Setup

library(conos)
library(magrittr)
library(dplyr)
library(cacoa) # github.com/kharchenkolab/cacoa
## Warning: replacing previous import 'ape::where' by 'dplyr::where' when loading
## 'cacoa'
library(sccore)
library(scHelper) # github.com/rrydbirk/scHelper
library(qs)
library(ggplot2)
library(ggsci)
library(cowplot)
library(ggpubr)
library(STRINGdb)
library(reshape2)
library(ggforce)
library(ggpmisc)
library(RColorBrewer)
library(CRMetrics)
library(ggrepel)
library(circlize)
library(ComplexHeatmap)
library(stringr)
library(rstatix)
library(slingshot)
library(gamm4)
library(scITD)
library(ggraph)

## Comparisons
comp <- list(c("CTRL","MSA"),
             c("CTRL","PD"),
             c("MSA","PD"))

comp.long <- list(c("CTRL vs. MSA","CTRL vs. PD"),
                  c("CTRL vs. MSA","PD vs. MSA"),
                  c("CTRL vs. PD","PD vs. MSA"))

# Palettes

## Major celltypes
anno.neurons <- qread("anno_neurons.qs")
anno.neurons.major <- anno.neurons %>% 
  collapseAnnotation("GABA") %>% 
  collapseAnnotation("GLU") %>% 
  renameAnnotation("GABA", "Inh. neurons") %>% 
  renameAnnotation("GLU", "Exc. neurons") %>% 
  collapseAnnotation("MSN")
## Collapsing 11 labels containing 'GABA' in their name into one label.
## Collapsing 2 labels containing 'GLU' in their name into one label.
## Collapsing 3 labels containing 'MSN' in their name into one label.
anno <- qread("anno_major.qs")

anno.major <- anno[!names(anno) %in% names(anno.neurons.major) & !anno == "Neurons"] %>% 
  {factor(c(., anno.neurons.major))}

pal.major <- brewer.pal(n = 10, "Set1") %>% 
  c("lightblue3") %>% 
  setNames(levels(anno.major)[c(9,8,3,5,10,6:7,2,1,4)])
## Warning in brewer.pal(n = 10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Condition
pal.major %<>% c(c("yellow", brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA")))

## Comparisons
pal.major %<>% c(brewer.pal(n = 10, "Paired")[c(6,8,10)] %>% setNames(c("CTRL vs. MSA","CTRL vs. PD","PD vs. MSA")))

## Neurons
pal.major %<>% c(setNames(c("navy",
                            "mediumblue",
                            "lightslateblue",
                            "lightskyblue",
                            "lightblue",
                            "lightseagreen",
                            "blue",
                            "cadetblue1",
                            "cyan",
                            "cyan4",
                            "aquamarine2",
                            "lightcoral",
                            "brown1",
                            "orange",
                            "darkorange4",
                            "lightgoldenrod3"), 
                          levels(anno.neurons)))

## Glia
anno.glia <- c("anno_astro.qs",
               "anno_oligo.qs",
               "anno_opc.qs") %>% 
  lapply(qread) %>% 
  Reduce(c, .) %>% 
  factor() %>% 
  renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>% 
  renameAnnotation("Reactive_astrocytes", "AS_reactive") %>% 
  renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
  renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>% 
  renameAnnotation("Reactive_SCGZ", "OL_SGCZ")

pal.major %<>% c(setNames(c("pink",
                            pal.major["Astrocytes"],
                            pal.major["Oligodendrocytes"],
                            "chartreuse",
                            "darkolivegreen",
                            "brown4",
                            "coral"),
                          levels(anno.glia)))

## Microglia
anno.micro <- qread("anno_micro_pvm.qs") %>% 
  renameAnnotation("Steady-state","MIC_steady-state") %>% 
  renameAnnotation("Intermediate1","MIC_intermediate1") %>% 
  renameAnnotation("Intermediate2","MIC_intermediate2") %>% 
  renameAnnotation("Activated","MIC_activated")

pal.major %<>% c(setNames(c(pal.major["Microglia"],
                            "maroon4",
                            "magenta",
                            "pink3",
                            pal.major["PVMs"]),
                          levels(anno.micro)))

## Phago. assay
pal.major %<>% c(setNames(c("purple", 
                            "black",
                            pal.major[c("CTRL","PD","MSA")]), 
                          c("PBS",
                            "LPS",
                            "CTRL CSF",
                            "PD CSF",
                            "MSA CSF")))

## Houston assay
pal.major %<>% c(setNames(c(pal.major["PBS"],
                            pal.major["CTRL"],
                            "yellow3",
                            "orange",
                            pal.major["PD"],
                            "cyan4",
                            "navyblue",
                            pal.major["MSA"],
                            "pink3",
                            "red4"), 
                          c("Microglia medium",
                            "CTRL CSF, no dil.",
                            "CTRL CSF, 1:2 dil.",
                            "CTRL CSF, 1:4 dil.",
                            "PD CSF, no dil.",
                            "PD CSF, 1:2 dil.",
                            "PD CSF, 1:4 dil.",
                            "MSA CSF, no dil.",
                            "MSA CSF, 1:2 dil.",
                            "MSA CSF, 1:4 dil.")))

# Load major Conos object
con <- qread("con_major.qs", nthreads = 10)

tt <- Sys.time()

Define helper functions

getOntWithFamily <- function(cao, comp, name = "GSEA") {
  fams <- cao$test.results[[name]]$families
  
  df.all <- list(cao$.__enclos_env__$private$getOntologyPvalueResults(name = name, genes = "up", p.adj = 1, q.value = 1),
                 cao$.__enclos_env__$private$getOntologyPvalueResults(name = name, genes = "down", p.adj = 1, q.value = 1)) %>% 
    bind_rows() %>% 
    mutate(logp = -log10(p.adjust),
           comp = comp)
  
  df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = name, subtype = "BP")
  
  return(list(df.all = df.all,
              df = df,
              fams = fams))
}
lianaCircos <- function(df,
                        top.interactions = 30, 
                        text.size = 1, 
                        pal = pal.major, 
                        cell.types = c("Astrocytes", 
                                       "Immune", 
                                       "Microglia",
                                       "Neurons",
                                       "OPCs",
                                       "Oligodendrocytes",
                                       "PVMs",
                                       "Pericytes/endothelial"),
                        big.gap = 5,
                        small.gap = 2,
                        arrow.width = 3,
                        link.ramp.rel = T,
                        link.sort = F,
                        scale = F,
                        arrow.head.width = 0.3,
                        arrow.head.length = 0.3,
                        link.ramp.col = c("navy", "grey", "firebrick")) {
  input_df <- df %>% 
    slice_max(order_by = score, n = top.interactions) %>% 
    mutate(target = paste0(target, " ")) %>% 
    mutate(source_lig = paste0(source, "|", ligand), 
           target_rec = paste0(target, "|", receptor))
  
  if (link.ramp.rel) {
    arr_wd <- rep(arrow.width, nrow(input_df))
  } else {
    arr_wd <- (((input_df$score - min(input_df$score))/(max(input_df$score) - min(input_df$score))) * (arrow.width)) + 1
  }
  
  # Colors and segments
  anno.col <- setNames(pal, 
                       cell.types) %>% 
    c(., "Oligodendrocytes " = unname(.["Oligodendrocytes"]))
  
  cell_cols <- anno.col[unique(c(unique(input_df$source), unique(input_df$target), "Oligodendrocytes "))]
  
  link_cols <- c()
  
  if (!link.ramp.rel) {
    for (i in input_df$source_lig) {
      link_cols <- c(link_cols, cell_cols[str_extract(i, 
                                                      "[^|]+")])
    }
  } else {
    input_df %<>% 
      arrange(score)
    
    df.down <- input_df %>% filter(score <= 0)
    link_down <- colorRampPalette(c(link.ramp.col[1], link.ramp.col[2]))(nrow(df.down))
    
    df.up <- input_df %>% filter(score > 0)
    link_up <- colorRampPalette(c(link.ramp.col[2], link.ramp.col[3]))(nrow(df.up))
    
    link_cols <- c(link_down, link_up)
  }
  
  segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), 
                       paste0(input_df$target, "|", input_df$receptor)))
  
  grp <- str_extract(segments, "[^|]+") %>% 
    setNames(segments)
  
  # Redo colors
  cell_cols2 <- grp
  for (i in unique(grp)) {
    cell_cols2[cell_cols2 == i] <- cell_cols[i]
  }
  
  # Plot
  input_df %>% 
    select(source_lig, target_rec, score) %>%
    chordDiagram(directional = 1, 
                 group = grp,
                 scale = scale, 
                 diffHeight = 0.005, 
                 direction.type = c("arrows"),
                 link.arr.type = "triangle", 
                 annotationTrack = c(), 
                 preAllocateTracks = list(
                   list(track.height = 0.05),
                   list(track.height = 0.25),
                   list(track.height = 0.05)), 
                 big.gap = big.gap,
                 transparency = 1,
                 link.arr.lwd = arr_wd,
                 link.arr.col = link_cols,
                 link.arr.length = arrow.head.length,
                 link.arr.width = arrow.head.width,
                 small.gap = small.gap
    )
  
  circos.track(track.index = 2, panel.fun = function(x, y) {
    circos.text(CELL_META$xcenter, 
                CELL_META$ylim[1], 
                str_extract(CELL_META$sector.index, "[^|]+$"), 
                facing = "clockwise", 
                niceFacing = TRUE, 
                adj = c(0, 0.55), 
                cex = 1)
  }, bg.border = NA)
  
  # Split segments
  for (l in segments) {
    highlight.sector(l, track.index = 3, col = cell_cols2[l])
  }
  
  # Add ligand/receptor track
  ## Ligand
  highlight.sector(input_df$source_lig, 
                   track.index = 1, 
                   col = "black", 
                   text = "Ligands", 
                   cex = 1, 
                   text.col = "white", 
                   niceFacing = TRUE)
  ## Receptor
  highlight.sector(input_df$target_rec, 
                   track.index = 1, 
                   col = "white", 
                   text = "Receptors", 
                   cex = 1, 
                   text.col = "black", 
                   border = "black", 
                   niceFacing = TRUE)
  
  # Legends
  minmax <- input_df %>% 
    pull(score) %>% 
    {pmax(abs(min(.)), max(.))} %>% 
    formatC(digits = 1) %>% 
    as.numeric()
  
  col.range = c(-minmax, 0, minmax)
  lgd_links = Legend(at = col.range, 
                     col_fun = colorRamp2(col.range, link.ramp.col), 
                     title_position = "topleft", 
                     title = "Links")
  
  lgd_ct <- Legend(labels = unique(c(input_df$source, input_df$target)), 
                   title = "Cell type", 
                   type = "points",
                   legend_gp = gpar(col = "transparent"), 
                   background = cell_cols[unique(c(input_df$source, input_df$target))])
  
  lgd_list_vertical = packLegend(lgd_ct, lgd_links)
  
  draw(lgd_list_vertical, 
       just = c("left", "bottom"), 
       x = unit(5, "mm"), 
       y = unit(5, "mm"))
  
  circos.clear()
}
getTscanTrajectory <- function(con, anno) {
  requireNamespace("TSCAN", quietly = T)
  
  emb <- con$embedding[names(anno), ]
  anno %<>% .[rownames(emb)]
  
  cent.ids <- emb %>% 
    rownames() %>% 
    split(anno)
  
  centroids <- cent.ids %>% 
    lapply(\(cid) emb[cid, ]) %>% 
    lapply(colMeans) %>% 
    bind_rows() %>% 
    t() %>% 
    `colnames<-`(c("UMAP1","UMAP2"))
  
  mst <- centroids %>% 
    TSCAN::createClusterMST(clusters = NULL)
  
  line.data <- TSCAN::reportEdges(centroids, mst = mst, clusters = NULL)
  
  return(line.data)
}
plotGenePseudoBulk <- function(gene, cm.pseudo, legend = T) {
  idx <- cm.pseudo %>% 
    colnames() %>% 
    data.frame(id = .) %>% 
    mutate(condition = strsplit(id, "_|!!") %>% sget(1),
           ct = strsplit(id, "!!") %>% sget(2)) %>% 
    mutate(ord = order(condition, ct))
  
  x <- cm.pseudo %>% 
    .[match(gene, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
    .[idx$ord]
  
  plot.dat <- x %>% 
    {data.frame(sample = names(.), 
                value = unname(.))} %>%
    mutate(anno = strsplit(sample, "!!") %>% 
             sget(2),
           condition = strsplit(sample, "!!|_") %>% 
             sget(1)) %>%
    mutate(anno = factor(anno))
  
  stat.test <- plot.dat %>% 
    group_by(anno) %>% 
    rstatix::wilcox_test(value ~ condition) %>% 
    filter(p.adj <= 0.05) %>% 
    rstatix::add_xy_position(x = "anno", step.increase = 0.05)
  
  omnibus.test <- plot.dat %>% 
    group_by(anno) %>%
    rstatix::kruskal_test(value ~ condition) %>% 
    filter(p <= 0.05) %>% 
    mutate(p = formatC(p, digits = 2))
  
  p <- plot.dat %>% 
    ggplot(aes(anno, value)) +
    geom_boxplot(aes(fill = condition)) +
    theme_bw() +
    theme(line = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
          axis.ticks.x = element_line()) +
    labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = paste0(gene, " expression")) +
    scale_fill_manual(values = pal.major) +
    geom_text(data = omnibus.test, aes(anno, y = pmin(max(plot.dat$value) * 1.05, max(plot.dat$value) + 10), label = p), size = 3) +
    stat_pvalue_manual(stat.test, hide.ns = T, label = "p.adj", size = 3)
  
  if (!legend) p <- p + guides(fill = "none")
  
  return(p)
}

Figure 1

Load data

cao_msa <- qread("cao_major_msa.qs", nthreads = 10)
cao_msa$plot.theme <- theme_bw()
cao_pd <- qread("cao_major_pd.qs", nthreads = 10)
cao_pd$plot.theme <- theme_bw()
cao_dis <- qread("cao_major_dis.qs", nthreads = 10)
cao_dis$plot.theme <- theme_bw()

Figure 1b

con$plotGraph(groups = anno.major, 
              embedding = "UMAP_1_0.001_5",
              size = 0.1, 
              palette = pal.major, 
              font.size = 3, 
              raster = T, 
              show.labels = T,
              plot.na = F, 
              show.legend = T,
              legend.title = "Cell type", 
              alpha = 0.05) + 
  dotSize(3) +
  labs(x="UMAP1", y= "UMAP2") +
  theme(line = element_blank()) +
  ylim(c(-20, 15)) +
  xlim(c(-21, 11)) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F1b.tsv", sep = "\t", dec = ".", row.names = F)

Figure 1c

cm.merged <- con$getJointCountMatrix()

markers <- c("AQP4","PTPRC","CSF1R","RBFOX3","SLC17A7","GAD1","PPP1R1B","MOG","VCAN","PDGFRB","MRC1")

dotPlot(markers, 
        cm.merged, 
        anno.major %>% 
          factor(., levels = sort(levels(.))[c(1,3,5,2,4,6,7:10)]), 
        cols = c("white","firebrick"), 
        gene.order = markers) -> p
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the sccore package.
##   Please report the issue at <https://github.com/kharchenkolab/sccore/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p

Export source data

p$data %>% 
  write.table("source_data/F1c.tsv", sep = "\t", dec = ".", row.names = F)

Figure 1d

spc <- con$getDatasetPerCell()

cpc <- spc %>% 
  as.character() %>% 
  grepl.replace(c("CTRL","MSA","PD")) %>% 
  `names<-`(names(spc))

con$plotGraph(groups = cpc, 
              embedding = "UMAP_1_0.001_5",
              size = 0.1, 
              alpha = 0.05,
              palette = pal.major, 
              font.size = 3, 
              raster = F, 
              show.labels = T,
              plot.na = F,
              mark.groups = F,
              show.legend = T) + 
  labs(x="UMAP1", y= "UMAP2", col = "") +
  theme(legend.position = "bottom",
        line = element_blank()) +
  dotSize(3) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F1d.tsv", sep = "\t", dec = ".", row.names = F)

Figure 1e

cao_msa$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() +
  coord_flip() +
  theme_bw() +
  scale_fill_manual(values = pal.major) +
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1),
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nMSA")) +
  labs(x = "", y = "") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F1e.tsv", sep = "\t", dec = ".", row.names = F)

Figure 1f

cao_pd$plotCellLoadings(show.pvals = F, 
                        alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1.1),
                     labels = c("-1\nCTRL",0,"1\nPD")) +
  geom_vline(xintercept = 7.5, col = "red") +
  labs(x = "", y = "") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F1f.tsv", sep = "\t", dec = ".", row.names = F)

Figure 1g

cao_dis$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nPD",0,"1\nMSA")) +
  geom_vline(xintercept = 5.5, col = "red") +
  labs(x = "", y = "") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F1g.tsv", sep = "\t", dec = ".", row.names = F)

Figure 1h

cao_msa$cell.groups.palette <- pal.major
stat.p <- cao_msa$test.results$expression.shifts$padjust %>% 
  {data.frame(Type = names(.), padj = unname(.), value = 0.4)} %>% 
  mutate(padj = formatC(padj, digits = 3))
stat.p$padj[stat.p$padj > 0.05] <- ""
cao_msa$plotExpressionShiftMagnitudes(show.pvalues = "none") + 
  labs(y = "Normalized expression distance") +
  geom_text(data = stat.p, aes(label = padj)) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F1h.tsv", sep = "\t", dec = ".", row.names = F)

Figure 1i

cao_pd$cell.groups.palette <- pal.major
stat.p <- cao_pd$test.results$expression.shifts$padjust %>% 
  {data.frame(Type = names(.), padj = unname(.), value = 0.35)} %>% 
  mutate(padj = formatC(padj, digits = 3))
stat.p$padj[stat.p$padj > 0.05] <- ""
cao_pd$plotExpressionShiftMagnitudes(show.pvalues = "none") + 
  labs(y = "Normalized expression distance") +
  geom_text(data = stat.p, aes(label = padj)) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F1i.tsv", sep = "\t", dec = ".", row.names = F)

Figure 1j

cao_dis$cell.groups.palette <- pal.major
stat.p <- cao_dis$test.results$expression.shifts$padjust %>% 
  {data.frame(Type = names(.), padj = unname(.), value = 0.3)} %>% 
  mutate(padj = formatC(padj, digits = 3))
stat.p$padj[stat.p$padj > 0.05] <- ""
cao_dis$plotExpressionShiftMagnitudes(show.pvalues = "none") + 
  labs(y = "Normalized expression distance") +
  geom_text(data = stat.p, aes(label = padj)) -> p

p
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

Export source data

p$data %>% 
  write.table("source_data/F1j.tsv", sep = "\t", dec = ".", row.names = F)

Figure 2

Load data

cao <- qread("cao_neurons.qs", nthreads = 10)
cao$plot.theme <- theme_bw()
cao_msa <- qread("cao_neurons_msa.qs", nthreads = 10)
cao_msa$plot.theme <- theme_bw()
cao_pd <- qread("cao_neurons_pd.qs", nthreads = 10)
cao_pd$plot.theme <- theme_bw()
cao_dis <- qread("cao_neurons_dis.qs", nthreads = 10)
cao_dis$plot.theme <- theme_bw()

Figure 2a

cao$plotEmbedding(groups = cao$cell.groups,
                  size = 0.1, 
                  palette = pal.major, 
                  font.size = 3, 
                  raster = T, 
                  show.labels = T,
                  plot.na = F,
                  alpha = 0.1) + 
  theme(line = element_blank()) +
  labs(x="largeVis1", y= "largeVis2") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F2a.tsv", sep = "\t", dec = ".", row.names = F)

Figure 2b

cm.merged <- cao$data.object$getJointCountMatrix()

markers <- c("GAD1","GAD2","PPP1R1B","SLC17A7","CHAT","CALB2","DCC","ID2","MEIS2","ST18","NKX2-1","NR2F2","PAX6","PVALB","RXFP1","SST","VIP","RELN","SATB2","DRD1","DRD2","FOXP2", "LRP8", "VLDLR")

dotPlot(markers, 
        cm.merged, 
        anno.neurons, 
        cols = c("white","firebrick"), 
        gene.order = markers) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F2b.tsv", sep = "\t", dec = ".", row.names = F)

Figure 2c

cao_msa$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nMSA")) +
  labs(x = "", y = "") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F2c.tsv", sep = "\t", dec = ".", row.names = F)

Figure 2d

cao_pd$plotCellLoadings(show.pvals = F, 
                        alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nPD")) +
  geom_vline(xintercept = 13.5, col = "red") +
  labs(x = "", y = "") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F2d.tsv", sep = "\t", dec = ".", row.names = F)

Figure 2e

cao_dis$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nPD",0,"1\nMSA")) +
  labs(x = "", y = "") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F2e.tsv", sep = "\t", dec = ".", row.names = F)

Figure 2f

cao_msa$cell.groups.palette <- pal.major
cao_msa$plotExpressionShiftMagnitudes() + 
  labs(y = "Normalized expression distance") -> p1

cao_pd$cell.groups.palette <- pal.major
cao_pd$plotExpressionShiftMagnitudes() + 
  labs(y = "Normalized expression distance") -> p2

cao_dis$cell.groups.palette <- pal.major
cao_dis$plotExpressionShiftMagnitudes() + 
  labs(y = "Normalized expression distance") -> p3

p1
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

p2
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

p3
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

Export source data

p1$data %>% 
  write.table("source_data/F2f1.tsv", sep = "\t", dec = ".", row.names = F)

p2$data %>% 
  write.table("source_data/F2f2.tsv", sep = "\t", dec = ".", row.names = F)

p3$data %>% 
  write.table("source_data/F2f3.tsv", sep = "\t", dec = ".", row.names = F)

Figure 2g

sample.groups <- cao$sample.groups

cao$estimateCellDensity(method = "graph")
cao$estimateDiffCellDensity(type = "subtract")

cao$plotCellDensity(show.cell.groups = F, show.legend = F, color.range = c(0, 0.00035))$CTRL +
  geom_circle(aes(x0 = 30.9, y0 = 10, r = 10)) +
  geom_circle(aes(x0 = 17.5, y0 = 24, r = 4), col = "cyan3") +
  theme(line = element_blank()) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F2g1.tsv", sep = "\t", dec = ".", row.names = F)
# MSA
cao$sample.groups <- sample.groups %>% 
  names() %>% 
  grepl.replace(c("CTRL","MSA","PD")) %>% 
  `names<-`(sample.groups %>% names()) %>% 
  .[. %in% c("CTRL","MSA")]
cao$target.level <- "MSA"
cao$estimateCellDensity(method = "graph", name = "msa.density")
cao$estimateDiffCellDensity(type = "subtract", name = "msa.density")
cao$plotCellDensity(show.cell.groups = F, name = "msa.density", show.legend = F, color.range = c(0, 0.00035))$MSA +
  geom_circle(aes(x0 = 30.9, y0 = 10, r = 10)) +
  geom_circle(aes(x0 = 17.5, y0 = 24, r = 4), col = "cyan3") +
  theme(line = element_blank()) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F2g2.tsv", sep = "\t", dec = ".", row.names = F)
# PD
cao$sample.groups <- sample.groups %>%
  names() %>%
  grepl.replace(c("CTRL","MSA","PD")) %>%
  `names<-`(sample.groups %>% names()) %>%
  .[. %in% c("CTRL","PD")]
cao$target.level <- "PD"
cao$estimateCellDensity(method = "graph", name = "pd.density")
cao$estimateDiffCellDensity(type = "subtract", name = "pd.density")
cao$plotCellDensity(show.cell.groups = F, name = "pd.density", show.legend = T, color.range = c(0, 0.00035))$PD +
  geom_circle(aes(x0 = 30.9, y0 = 10, r = 10)) +
  geom_circle(aes(x0 = 17.5, y0 = 24, r = 4), col = "cyan3") +
  theme(line = element_blank()) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F2g3.tsv", sep = "\t", dec = ".", row.names = F)

Figure 3

Load data

con.glia <- qread("con_oligo_astro_opc.qs", nthreads = 10)

cao_msa <- qread("cao_oligo_astro_opc_msa.qs", nthreads = 10)
cao_msa$plot.theme <- theme_bw()
cao_pd <- qread("cao_oligo_astro_opc_pd.qs", nthreads = 10)
cao_pd$plot.theme <- theme_bw()
cao_dis <- qread("cao_oligo_astro_opc_dis.qs", nthreads = 10)
cao_dis$plot.theme <- theme_bw()

Figure 3a

con.glia$plotGraph(groups = anno.glia, 
                   plot.na = F, 
                   size = 0.1,
                   palette = pal.major, 
                   font.size = 3, 
                   raster = T, 
                   show.labels = T, embedding = "largeVis_CPCA_AS01",
                   alpha = 0.05) + 
  labs(x = "largeVis1", y = "largeVis2") +
  theme(line = element_blank()) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F3a.tsv", sep = "\t", dec = ".", row.names = F)

Figure 3b

cm.merged <- con.glia$getJointCountMatrix()

markers <- c("AQP4","TNC","MOG","LINC01608","SLC5A11","SGCZ","VCAN","OLIG2")

dotPlot(markers, 
        cm.merged, 
        anno.glia,
        cols = c("white","firebrick"), 
        gene.order = markers) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F3b.tsv", sep = "\t", dec = ".", row.names = F)

Figure 3c

cao_msa$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) +
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1.1,1),
                     labels = c("-1\nCTRL",0,"1\nMSA")) +
  geom_vline(xintercept = 6.5, col = "red") +
  labs(x = "", y = "") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F3c.tsv", sep = "\t", dec = ".", row.names = F)

Figure 3d

cao_pd$plotCellLoadings(show.pvals = F, 
                        alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1.15,1),
                     labels = c("-1\nCTRL",0,"1\nPD")) +
  geom_vline(xintercept = 4.5, col = "red") +
  labs(x = "", y = "") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F3d.tsv", sep = "\t", dec = ".", row.names = F)

Figure 3e

cao_dis$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1.1,1),
                     labels = c("-1\nPD",0,"1\nMSA")) +
  geom_vline(xintercept = 6.5, col = "red") +
  labs(x = "", y = "") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F3e.tsv", sep = "\t", dec = ".", row.names = F)

Figure 3f

stat.p <- cao_msa$test.results$expression.shifts$padjust %>% 
  {data.frame(Type = names(.), padj = unname(.), value = 0.1)} %>% 
  mutate(padj = formatC(padj, digits = 3))
stat.p$padj[stat.p$padj > 0.05] <- ""
cao_msa$plotExpressionShiftMagnitudes(show.pvalues = "none") + 
  labs(y = "Normalized expression distance") +
  geom_text(data = stat.p, aes(label = padj)) -> p1

stat.p <- cao_pd$test.results$expression.shifts$padjust %>% 
  {data.frame(Type = names(.), padj = unname(.), value = 0.14)} %>% 
  mutate(padj = formatC(padj, digits = 3))
stat.p$padj[stat.p$padj > 0.05] <- ""
cao_pd$plotExpressionShiftMagnitudes(show.pvalues = "none") + 
  labs(y = "Normalized expression distance") +
  geom_text(data = stat.p, aes(label = padj)) -> p2

stat.p <- cao_dis$test.results$expression.shifts$padjust %>% 
  {data.frame(Type = names(.), padj = unname(.), value = 0.13)} %>% 
  mutate(padj = formatC(padj, digits = 3))
stat.p$padj[stat.p$padj > 0.05] <- ""
cao_dis$plotExpressionShiftMagnitudes(show.pvalues = "none") + 
  labs(y = "Normalized expression distance") +
  geom_text(data = stat.p, aes(label = padj)) -> p3

p1

p2

p3

Export source data

p1$data %>% 
  write.table("source_data/F3f1.tsv", sep = "\t", dec = ".", row.names = F)

p2$data %>% 
  write.table("source_data/F3f2.tsv", sep = "\t", dec = ".", row.names = F)

p3$data %>% 
  write.table("source_data/F3f3.tsv", sep = "\t", dec = ".", row.names = F)

Export source data

p$data %>% 
  write.table("source_data/F1j.tsv", sep = "\t", dec = ".", row.names = F)
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   11488150   613.6   19635608  1048.7   19635608  1048.7
## Vcells 3018272998 23027.6 6329956030 48293.8 6237672537 47589.7

Figure 4

Figure 4a,b

cao_msa <- qread("cao_astro_msa.qs", nthreads = 10)
cao_msa$plot.theme <- theme_bw()
cao_pd <- qread("cao_astro_pd.qs", nthreads = 10)
cao_pd$plot.theme <- theme_bw()
cao_dis <- qread("cao_astro_dis.qs", nthreads = 10)
cao_dis$plot.theme <- theme_bw()

df.all <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df.all") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower()))
## Loading required namespace: DOSE
## 
# Select relevant pathways
p.ids <- c("GO:0050821", "GO:0050728", "GO:0006986", "GO:0030168", "GO:0030001", "GO:1904707", "GO:0140053", "GO:0006486", "GO:0010717", "GO:0031113", "GO:0032675", "GO:0061041", "GO:0071222", "GO:0045766", "GO:0006487", "GO:0097501", "GO:0042776", "GO:0045047", "GO:0044794", "GO:0035633", "GO:0097242", "GO:0006120", "GO:0042026", "GO:0006979", "GO:0071222", "GO:0017015", "GO:0009611", "GO:1901201", "GO:0042594", "GO:0097501", "GO:0006123", "GO:0006909", "GO:0007179", "GO:0006120", "GO:0097501")

Figure 4a

ct <- "AS_homeostatic"

df.sel <- df.all %>% 
  filter(Group == ct)

p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
  dotSize(3)

df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower())) %>% 
  filter(Group == ct,
         ID %in% p.ids)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  mutate(Group = factor(Group, levels = sort(unique(Group)))) %>% 
  select(Description, NES, Group, comp) %>%
  as.data.frame() %>% 
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>%
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

Export source data

p1$data %>% 
  write.table("source_data/F4a.tsv", sep = "\t", dec = ".", row.names = F)

Figure 4b

ct <- "AS_reactive"

df.sel <- df.all %>% 
  filter(Group == ct)

p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
  dotSize(3)

df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower())) %>% 
  filter(Group == ct,
         ID %in% p.ids)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  mutate(Group = factor(Group, levels = sort(unique(Group)))) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>%
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

p1$data %>% 
  write.table("source_data/F4b.tsv", sep = "\t", dec = ".", row.names = F)

Figure 4c

cao_msa <- qread("cao_oligo_msa.qs", nthreads = 10)
cao_msa$plot.theme <- theme_bw()
cao_pd <- qread("cao_oligo_pd.qs", nthreads = 10)
cao_pd$plot.theme <- theme_bw()
cao_dis <- qread("cao_oligo_dis.qs", nthreads = 10)
cao_dis$plot.theme <- theme_bw()

df.all <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df.all") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("OL_", strsplit(Group, "_") %>% sget(2) %>% gsub("SCGZ", "SGCZ", .)))

# Select relevant pathways
p.ids <- c("GO:0071345", "GO:0045596", "GO:0060271", "GO:0048709", "GO:0006986", "GO:0016032", "GO:0060271", "GO:0042026", "GO:0006986", "GO:0120034", "GO:0071346", "GO:1904262", "GO:0042776", "GO:0061077", "GO:0050821", "GO:0045047", "GO:0042776", "GO:0050821", "GO:0045047", "GO:0007042", "GO:0042776", "GO:0061077", "GO:0045047", "GO:0042776", "GO:0045047", "GO:2000249", "GO:0002474", "GO:0042026", "GO:0002040", "GO:0046330", "GO:0043542", "GO:0030036", "GO:0032981", "GO:0007042")

ct <- "OL_SLC5A11"

df.sel <- df.all %>% 
  filter(Group == ct)

p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
  dotSize(3)

df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("OL_", strsplit(Group, "_") %>% sget(2) %>% gsub("SCGZ", "SGCZ", .))) %>% 
  filter(Group == ct,
         ID %in% p.ids)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  mutate(Group = factor(Group, levels = sort(unique(Group)))) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>%
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

Export source data

p1$data %>% 
  write.table("source_data/F4c.tsv", sep = "\t", dec = ".", row.names = F)

Figure 4d,e

cm.merged <- con$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.glia)]

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.glia %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  {. + 1} %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

Figure 4d

# For DE between visits
genes <- "TLR1"

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(condition = strsplit(id, "_|!!") %>% sget(1),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(condition, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[idx$ord]

plot.dat <- x %>% 
  {data.frame(sample = names(.), 
              value = unname(.))} %>%
  mutate(anno = strsplit(sample, "!!") %>% 
           sget(2),
         condition = strsplit(sample, "!!|_") %>% 
           sget(1)) %>% 
  filter(anno %in% c("OL_LINC01608", "OL_SGCZ", "OL_SLC5A11")) %>% 
  mutate(anno = factor(anno))

stat.test <- plot.dat %>% 
  group_by(anno) %>% 
  rstatix::dunn_test(value ~ condition) %>% 
  rstatix::add_xy_position(x = "anno", step.increase = 0.1, fun = "mean_sd", scales = "free") %>% 
  mutate(p.adj = formatC(p.adj, digits = 2))

plot.dat %>% 
  ggplot(aes(anno, value)) +
  geom_boxplot(aes(fill = condition)) +
  theme_bw() +
  theme(line = element_blank(),
        axis.ticks.x = element_line(),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "TLR1 expression") +
  scale_fill_manual(values = pal.major) +
  stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.format", label.y = 8.5e2) +
  stat_pvalue_manual(stat.test, label = "p.adj", hide.ns = T) -> p
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", :
## Ignoring unknown aesthetics: fill
p

p$data %>% 
  write.table("source_data/F4d.tsv", sep = "\t", dec = ".", row.names = F)

Figure 4e

# For DE between visits
genes <- "LRP1"

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(condition = strsplit(id, "_|!!") %>% sget(1),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(condition, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[idx$ord]

plot.dat <- x %>% 
  {data.frame(sample = names(.), 
              value = unname(.))} %>%
  mutate(anno = strsplit(sample, "!!") %>% 
           sget(2),
         condition = strsplit(sample, "!!|_") %>% 
           sget(1)) %>% 
  filter(anno %in% c("OL_LINC01608", "OL_SGCZ", "OL_SLC5A11")) %>% 
  mutate(anno = factor(anno))

stat.test <- plot.dat %>% 
  group_by(anno) %>% 
  rstatix::wilcox_test(value ~ condition) %>% 
  rstatix::add_xy_position(x = "anno", step.increase = 0.05) %>% 
  mutate(p.adj = formatC(p.adj, digits = 2))

plot.dat %>% 
  ggplot(aes(anno, value)) +
  geom_boxplot(aes(fill = condition)) +
  theme_bw() +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        axis.ticks.x = element_line()) +
  labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "LRP1 expression") +
  scale_fill_manual(values = pal.major) +
  stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.format", label.y = 175) +
  stat_pvalue_manual(stat.test, label = "p.adj", hide.ns = T) +
  ylim(c(0, 180)) -> p
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", :
## Ignoring unknown aesthetics: fill
p

Export source data

p$data %>% 
  write.table("source_data/F4e.tsv", sep = "\t", dec = ".", row.names = F)

Figure 4f

For calculation of data, see Liana.ipynb.

dat <- read.delim("liana_res.csv",
                  sep = ",",
                  header = T) %>%
  mutate(group = strsplit(sample, "_") %>%
           sget(1))

dat.plot <- dat %>% 
  dplyr::rename(ligand = ligand_complex,
                receptor = receptor_complex,
                score = lrscore) %>%
  filter(target == "Oligodendrocytes",
         ligand %in% c("BMP1", "SERPINE2", "PSAP", "APOE", "APP", "SERPING1"),
         receptor %in% c("LRP1", "BMPR1A", "LRP4")) %>%
  group_by(group, ligand, receptor, source, target) %>% 
  summarize(score = mean(score)) %>% 
  ungroup() %>% 
  arrange(group, source, target, ligand, receptor)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by group, ligand, receptor, source, and
##   target.
## ℹ Output is grouped by group, ligand, receptor, and source.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(group, ligand, receptor, source, target))` for
##   per-operation grouping (`?dplyr::dplyr_by`) instead.
dat.msa <- dat.plot %>% 
  filter(group == "MSA",
         score > 0.39) %>% 
  mutate(lrst = paste0(ligand, receptor, source, target))

dat.pd <- dat.plot %>% 
  mutate(lrst = paste0(ligand, receptor, source, target)) %>% 
  filter(group == "PD",
         lrst %in% dat.msa$lrst)

dat.rel <- dat.msa %>% 
  mutate(score = score - dat.pd$score)

dat.rel %>% 
  lianaCircos()

dat.rel %>% 
  write.table("source_data/F4f.tsv", sep = "\t", dec = ".", row.names = F)

Figure 5 & EDF14

Load data

cao <- qread("cao_micro_pvm.qs", nthreads = 10)
cao$plot.theme <- theme_bw()
cao_msa <- qread("cao_micro_pvm_msa.qs", nthreads = 10)
cao_msa$plot.theme <- theme_bw()
cao_pd <- qread("cao_micro_pvm_pd.qs", nthreads = 10)
cao_pd$plot.theme <- theme_bw()
cao_dis <- qread("cao_micro_pvm_dis.qs", nthreads = 10)
cao_dis$plot.theme <- theme_bw()
sample.groups <- cao$sample.groups

Figure 5a

cao$plotEmbedding(groups = anno.micro,
                  size = 0.5, 
                  palette = pal.major, 
                  font.size = 3, 
                  raster = T, 
                  mark.groups = T,
                  plot.na = F,
                  alpha = 0.2) + 
  labs(x="UMAP1", y= "UMAP2") +
  theme(line = element_blank()) -> p

p

p$data %>% 
  write.table("source_data/F5a.tsv", sep = "\t", dec = ".", row.names = F)

Figure 5b

cm.merged <- cao$data.object$getJointCountMatrix()

markers <- c("AIF1","F13A1","MRC1","CD163","CD74")

dotPlot(markers, 
        cm.merged, 
        cao$cell.groups, 
        cols = c("white","firebrick")) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F5b.tsv", sep = "\t", dec = ".", row.names = F)

Figure 5c-e

anno.sort <- anno.micro[rownames(cao$data.object$embedding)]
anno.sel <- anno.sort[!anno.sort %in% c("MIC_intermediate1", "PVMs")] %>% factor()
anno.sel2 <- anno.sort[!anno.sort %in% c("MIC_intermediate2", "MIC_activated")] %>% factor()

ldata1 <- getTscanTrajectory(cao$data.object, anno.sel)
ldata2 <- getTscanTrajectory(cao$data.object, anno.sel2)

ldata <- rbind(ldata1, ldata2)

Figure 5c

cao$data.object$embedding %>% 
  as.data.frame() %>% 
  mutate(., unname(anno.micro[rownames(.)])) %>% 
  setNames(c("UMAP1", "UMAP2", "annotation")) %>% 
  ggplot() + 
  geom_point(aes(UMAP1, UMAP2, col = annotation), size = 0.3) +
  geom_line(data = ldata, mapping=aes(UMAP1, UMAP2, group = edge), linewidth = 1) +
  theme_bw() + 
  theme(legend.position = "right",
        line = element_blank()) + 
  labs(col = "", x = "UMAP1", y = "UMAP2") +
  scale_colour_manual(values = pal.major) +
  dotSize(3) -> p

p

Export source data

n <- nrow(p$data)
cbind(p$data, p@layers$geom_line$data[seq_len(n), ]) %>% 
  write.table("source_data/F5c.tsv", sep = "\t", dec = ".", row.names = F)

Figures 5d,e & EDF14

Prepare data

emb <- cao$data.object$embedding %>% 
  `colnames<-`(c("UMAP1","UMAP2")) %>% 
  .[rownames(.) %in% names(anno.sel2), ]

sds_obj <- slingshot(emb, 
                     anno.sel2, 
                     start.clus = "MIC_steady-state",
                     stretch = 0
)

sds <- as.SlingshotDataSet(sds_obj)

pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1]

Here, we show the code for running the generalized additive mixed model. It takes a significant amount of time to run, we provide data in pseudotime_micro_l2.qs.

mat <- cao$data.object$getJointCountMatrix() %>%
  .[match(names(sds_obj), rownames(.)), colSums(.) > 2e2] %>% 
  .[, !grepl(pattern = "RPL|RPS|MT-", colnames(.))]

mt <- mat %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column() %>%
  {message("Mutating"); mutate(.,
                               pseudotime = pseudotime[rowname],
                               condition = cao$data.object$getDatasetPerCell()[rowname] %>%
                                 as.character() %>%
                                 strsplit("_") %>%
                                 sget(1),
                               sample = strsplit(rowname, "!!") %>%
                                 sget(1))} %>%
  .[complete.cases(.), ] %>%
  {message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "condition", "sample"))} %$%
  {message("Splitting"); split(., variable)}
res <- mt %>% 
  sccore::plapply(\(x) {
    fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(condition)), random = ~(1 | sample), REML = F)
    fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | sample), REML = F)
    fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | sample), REML = F)
    
    ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
    
    if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
      residuals <- predict(fit_full$gam, se.fit = T)$fit %>% 
        unname()
      
      r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>% 
        setNames(c("full", "reduced"))
      
      out <- list(anova = ann,
                  residuals = residuals,
                  r.sq = r.sq)
      
      return(out)
    }
  }, n.cores = 40, mc.preschedule = T, mc.cleanup = T, progress = F) %>%
  .[!sapply(., is.null)]

qsave(res, "pseudotime_micro_l2.qs")

Figure 5d

cao$data.object$embedding %>% 
  as.data.frame() %>% 
  mutate(., unname(anno.micro[rownames(.)])) %>% 
  setNames(c("UMAP1", "UMAP2", "annotation")) %>% 
  mutate(., pseudotime = pseudotime[rownames(.)]) %>%
  ggplot() +
  geom_point(aes(UMAP1, UMAP2, col = pseudotime), size = 0.3) +
  geom_line(data = ldata2, aes(UMAP1, UMAP2, group = edge), linewidth = 1) +
  theme_bw() +
  theme(line = element_blank()) +
  scale_color_gradient(low = "navyblue", high = "orange") +
  labs(col = "Pseudotime", x = "UMAP1", y = "UMAP2") -> p

p

Export source data

n <- nrow(p$data)
cbind(p$data, p@layers$geom_line$data[seq_len(n), ]) %>%
  write.table("source_data/F5d.tsv", sep = "\t", dec = ".", row.names = F)

Figure 5e

Please note, row order may change per iteration.

res <- qread("pseudotime_micro_l2.qs")

rsq.pseudo <- res %>% 
  sapply(\(gene) gene$r.sq[1]) %>% 
  sort(decreasing = T)

res.filter <- res[names(rsq.pseudo)[seq(50)] %>% strsplit(".", fixed = T) %>% sget(1)]

cm.merged <- cao$data.object$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>% 
  .[rowSums(.) > 0, ]

## Predict smoothend expression 
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7

scFit <- cm.merged %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)

Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]

# Create heatmap
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

Heatmap(Smooth, 
        col = col_fun,
        cluster_columns=F, 
        cluster_rows=F, 
        show_column_names = F, 
        row_names_gp = grid::gpar(fontsize = 5)) -> p

p

Export source data

p@matrix %>% 
  write.table("source_data/F5e.tsv", sep = "\t", dec = ".", row.names = F)
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   13803240   737.2   39451381  2107.0   39451381  2107.0
## Vcells 1572541131 11997.6 5833800118 44508.4 9054649287 69081.5

Extended Data Figure 14

We provide this figure here since all data are already loaded.

cpc <- pseudotime %>% 
  names() %>% 
  strsplit("_") %>% 
  sget(1) %>% 
  factor()

res[rownames(Smooth)] %>% 
  lget("residuals") %>% 
  lapply(as.data.frame) %>% 
  lapply(setNames, "residuals") %>% 
  lapply(mutate, group = cpc, pseudotime = pseudotime) %>% 
  data.table::rbindlist(idcol = "gene") %>% 
  ggplot(aes(pseudotime, residuals, col = group)) +
  geom_smooth() +
  theme_bw() +
  facet_wrap(~gene, ncol = 5, scales = "free") -> p

p
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Figure 5f

cao_msa$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) +
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none") +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nMSA")) +
  geom_vline(xintercept = 4.5, col = "red") +
  labs(x = "", y = "") +
  theme(line = element_blank()) -> p

p

p$data %>% 
  write.table("source_data/F5f.tsv", sep = "\t", dec = ".", row.names = F)

Figure 5g

cao_pd$plotCellLoadings(show.pvals = F, 
                        alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none") +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1.1),
                     labels = c("-1\nCTRL",0,"1\nPD")) +
  geom_vline(xintercept = 4.5, col = "red") +
  labs(x = "", y = "") +
  theme(line = element_blank()) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F5g.tsv", sep = "\t", dec = ".", row.names = F)

Figure 5h

cao_dis$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none") +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1.2,1),
                     labels = c("-1\nPD",0,"1\nMSA")) +
  geom_vline(xintercept = 3.5, col = "red") +
  labs(x = "", y = "") +
  theme(line = element_blank()) -> p

p

p$data %>% 
  write.table("source_data/F5h.tsv", sep = "\t", dec = ".", row.names = F)
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   13358041   713.4   39451381  2107.0   39451381  2107.0
## Vcells 1427337638 10889.8 4667040095 35606.7 9054649287 69081.5

Figure 5j

We provide combined data from the RNAscope experiments as a single file res.qs.

res <- qread("RNAscope.qs") %>% 
  filter(target == "AIF1", Ch1NumSpots > 0) %>% 
  arrange(file)

area <- read.table("RNAscope_areas.tsv", sep = "\t", header = T) %>% # Million pixels
  mutate(file = paste(File.name, Tissue, sep = "")) %>% 
  filter(file %in% res$file) %>% 
  arrange(file) %>% 
  mutate(rel_px = px.2 / 1E6)
p1 <- res %>% 
  group_by(group, target, file) %>% 
  summarize(spots = mean(Ch1NumSpots)) %>% 
  ggplot(aes(group, spots, fill = group)) + 
  geom_boxplot() +
  geom_jitter(width = 0.2) +
  theme_bw() +
  labs(y = "Mean no. spots per double-positive cell", x = "") +
  stat_compare_means(method = "kruskal.test", label.y = 115) +
  stat_compare_means(comparisons = comp, method = "wilcox.test") +
  theme(line = element_blank()) +
  guides(fill = "none") +
  scale_fill_manual(values = pal.major)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by group, target, and file.
## ℹ Output is grouped by group and target.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(group, target, file))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
p2 <- res %>% 
  group_by(group, target, file) %>% 
  summarize(no_cells = n()) %>% 
  as.data.frame() %>% 
  arrange(file) %>% 
  mutate(rel_cells = no_cells / area$rel_px) %>% 
  ggplot(aes(group, rel_cells, fill = group)) + 
  geom_boxplot(outliers = F) +
  geom_jitter(width = 0.2) +
  theme_bw() +
  stat_compare_means(method = "kruskal.test", label.y = 17) +
  stat_compare_means(comparisons = comp, method = "wilcox.test") +
  labs(y = "No. double-positive cells\nNormalized to area", x = "") +
  theme(line = element_blank()) +
  guides(fill = "none") +
  scale_fill_manual(values = pal.major)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by group, target, and file.
## ℹ Output is grouped by group and target.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(group, target, file))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
plot_grid(plotlist = list(p1, p2), ncol = 1)

Export source data

p1$data %>% 
  write.table("source_data/F5j1.tsv", sep = "\t", dec = ".", row.names = F)

p2$data %>% 
  write.table("source_data/F5j2.tsv", sep = "\t", dec = ".", row.names = F)

Figure 5k

fams <- cao_msa$test.results[["GSEA"]]$families

df.all <- list(cao_msa$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "up", p.adj = 1, q.value = 1),
               cao_msa$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "down", p.adj = 1, q.value = 1)) %>% 
  bind_rows() %>% 
  mutate(logp = -log10(p.adjust))

# Relevant pathways
p.ids <- c("GO:0060760", "GO:0002230", "GO:0019915", "GO:1904646", "GO:0042775", "GO:0019646", "GO:0033108", "GO:0035455", "GO:0061077", "GO:0006914", "GO:0042026", "GO:0034340", "GO:0051607", "GO:0060337", "GO:0035455", "GO:0002684", "GO:0036037", "GO:0048514")

df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = "GSEA", subtype = "BP") %>% 
  mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .)) %>% 
  filter(ID %in% p.ids)

df.all %<>% mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .))

p1 <- ggplot(df.all, aes(NES, logp, col = Group, shape = Group)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.5) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)") +
  dotSize(3)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  mutate(Group = factor(Group, levels = sort(unique(Group)))) %>% 
  select(Description, NES, Group) %$% 
  split(., Group) %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  dplyr::slice(seq(25)) %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = Group)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group) %>%
  dplyr::slice(seq(20)) %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = Group)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

p1$data %>% 
  write.table("source_data/F5k.tsv", sep = "\t", dec = ".", row.names = F)
##              used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   12823655  684.9   39451381  2107.0   39451381  2107.0
## Vcells 1283500184 9792.4 3733632076 28485.4 9054649287 69081.5

Figure 5l

fams <- cao_pd$test.results[["GSEA"]]$families

df.all <- list(cao_pd$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "up", p.adj = 1, q.value = 1),
               cao_pd$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "down", p.adj = 1, q.value = 1)) %>% 
  bind_rows() %>% 
  mutate(logp = -log10(p.adjust))

# Select relevant pathways
p.ids <- c("GO:0042776", "GO:0034341", "GO:0001916", "GO:0002503", "GO:0019885", "GO:0016042", "GO:0001909", "GO:0051085", "GO:0045824", "GO:0044000", "GO:0019885", "GO:0044242", "GO:0001944", "GO:0034976", "GO:0019646", "GO:0009205", "GO:0019883", "GO:0070972", "GO:0002474")

df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = "GSEA", subtype = "BP") %>% 
  mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .)) %>% 
  filter(ID %in% p.ids)

df.all %<>% mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .))

p1 <- ggplot(df.all, aes(NES, logp, col = Group, shape = Group)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.5) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)") +
  dotSize(3)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  select(Description, NES, Group) %$% 
  split(., Group) %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  dplyr::slice(seq(25)) %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = Group)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group) %$% 
  split(., Group) %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  dplyr::slice(seq(25)) %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = Group)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

Export source data

p1$data %>% 
  write.table("source_data/F5l.tsv", sep = "\t", dec = ".", row.names = F)
##              used (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   12188442  651   39451381  2107.0   39451381  2107.0
## Vcells 1081334605 8250 2986905661 22788.3 9054649287 69081.5

Figure 6

Figures 6c,e,f where made with GraphPad Prism. Data are available in Phagocytosis_assay_triplicates_raw_data_HH.tsv (Copenhagen experiment) or HOUSTON.tsv (Houston experiment).

Figure 6b

dat <- read.delim("Phagocytosis_assay_triplicates_raw_data_HH.tsv", 
                  header = T, dec = ",") %>% 
  tidyr::pivot_longer(names_to = "group", cols = -1, values_to = "value") %>% 
  mutate(group = gsub(".1|.2", "", group) %>% factor(labels = c("PBS","LPS","MSA CSF","CTRL CSF","PD CSF"))) %>% 
  mutate(group = factor(group, levels = c("PBS","LPS","CTRL CSF","MSA CSF","PD CSF"))) %>% 
  group_by(Time, group) %>% 
  summarize(mean = mean(value, na.rm = T), 
            sd = sd(value, na.rm = T))
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by Time and group.
## ℹ Output is grouped by Time.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(Time, group))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
dat %>% 
  ggplot(aes(Time, mean, group=group, color=group)) + 
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
  geom_point(size = 0.5) +
  theme_bw() +
  labs(x = "Time (hours)", y = "pHrodo-labelled E. coli uptake - intensity ratio [AU]", col = "Stimulation") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) -> p

p

p$data %>% 
  write.table("source_data/F6b.tsv", sep = "\t", dec = ".", row.names = F)

Figure 6d

dat.raw <- read.table("Cytokine_summary.tsv", header = TRUE, sep = "\t", dec = ",")

dat.tmp <- dat.raw %>%
  melt(id.vars = c("Sample","Group","Condition")) %>% 
  mutate(variable = variable %>% 
           as.character() %>% 
           gsub(".", "-", ., fixed = T) %>% 
           as.factor()) %>%
  filter(Condition == "Media", !Group %in% c("PBS","LPS")) %>%
  filter(Condition == "Media") %>%
  mutate(Group = Group %>% factor(levels = c("CTRL","MSA","PD")),
         variable = variable %>% factor())

dat.stat <- dat.raw %>% 
  select(!IL.8) %>% 
  mutate(Group = factor(Group, levels = c("PBS","LPS","CTRL","MSA","PD"))) %>% 
  filter(!Group %in% c("PBS","LPS"))

res.il10 <- FSA::dunnTest(IL10 ~ Group, data = dat.stat %>% {`colnames<-`(., gsub(".", "", colnames(.), fixed = T))}, method = "bh")
## Registered S3 methods overwritten by 'FSA':
##   method       from
##   confint.boot car 
##   hist.boot    car
stat.test <- dat.tmp %>% 
  filter(variable == "IL-10") %>% 
  select(Group, value) %>% 
  as.data.frame() %>%
  rstatix::wilcox_test(value ~ Group) %>% 
  mutate(p = res.il10$res$P.unadj,
         p.adj = res.il10$res$P.adj %>% formatC(digits = 2),
         p.adj.signif = c("*","ns","ns")) %>% 
  rstatix::add_xy_position(x = "Group")

dat.tmp %>% 
  filter(variable == "IL-10") %>%
  ggplot(aes(Group, value)) + 
  geom_point(aes(fill = Group), size = 3, position = position_jitter(width = 0.3), pch = 21, color = "black") + 
  theme_bw() +
  theme(line = element_blank()) +
  stat_pvalue_manual(stat.test, label = "p.adj", tip.length = 0.01, backet.nudge.y = 3, hide.ns = F) +
  scale_fill_manual(values = pal.major) +
  labs(x = "", y = "IL-10 pg/mL") +
  guides(fil = "none") -> p

p

Export source data

p$data %>% 
  write.table("source_data/F6d.tsv", sep = "\t", dec = ".", row.names = F)

Figures 6g-i

dat.melt <- read.table("Microglia+CSF_samples_aSyn_sCD163.tsv", header = T, sep = "\t", dec = ",") %>%  
  melt(id.vars = c("diagnosis", "sex")) %>% 
  mutate(diagnosis = factor(diagnosis, levels = c("CTRL","MSA","IPD"), labels = c("CTRL","MSA","PD")))

Figure 6g

dat.melt %>% 
  filter(variable == "sCD163_ng.mL_CSF") %>% 
  ggplot(aes(diagnosis, value)) +
  geom_boxplot(aes(fill = diagnosis)) +
  theme_bw() + 
  labs(x = "", y = "sCD163 ng/ml", fill = "Diagnosis") +
  scale_fill_manual(values = pal.major) +
  stat_compare_means(method = "t.test", comparisons = list(c("CTRL","MSA"), c("MSA","PD"), c("CTRL","PD"))) +
  guides(fill = "none") +
  theme(line = element_blank()) -> p

p

Export source data

p$data %>% 
  write.table("source_data/F6g.tsv", sep = "\t", dec = ".", row.names = F)

Figure 6h

dat.melt %>% 
  filter(variable == "aSyn_pg.mL") %>% 
  ggplot(aes(diagnosis, value)) +
  geom_boxplot(aes(fill = diagnosis)) +
  theme_bw() + 
  labs(x = "", y = "aSyn pg/ml", fill = "Diagnosis") +
  scale_fill_manual(values = pal.major) +
  stat_compare_means(method = "t.test", comparisons = list(c("CTRL","MSA"), c("MSA","PD"), c("CTRL","PD"))) +
  guides(fill = "none") +
  theme(line = element_blank()) -> p

p
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_signif()`).

p$data %>% 
  write.table("source_data/F6h.tsv", sep = "\t", dec = ".", row.names = F)

Figure 6i

dat.melt %>% 
  filter(variable %in% c("aSyn_pg.mL","sCD163_ng.mL_CSF")) %>%
  select(-sex) %>% 
  mutate(id = rep(seq(63), 2)) %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  ggplot(aes(aSyn_pg.mL, sCD163_ng.mL_CSF)) +
  geom_point(aes(fill = diagnosis), pch = 21, color = "black") +
  theme_bw() +
  labs(x = "aSyn pg/ml", y = "sCD163, ng/ml", fill = "") +
  geom_smooth(method = MASS::rlm, se = FALSE, color = "black") +
  stat_poly_eq(aes(label = paste(after_stat(rr.label), after_stat(p.value.label), sep = "*\", \"*")), color = "black") +
  scale_fill_manual(values = pal.major) +
  theme(line = element_blank()) -> p

p
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_poly_eq()`).
## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

Export source data

p$data %>% 
  write.table("source_data/F6i.tsv", sep = "\t", dec = ".", row.names = F)

Figure 6j

Here, we use data from Rydbirk et al. on differentially expressed proteins in the soluble fraction of CSF pools.

dat <- read.delim("Soluble_fraction.txt")

msa.dep <- dat %>% 
  filter(Disease.group == "MSA") %>% 
  pull(Gene.name)

pd.dep <- dat %>% 
  filter(Disease.group == "PD") %>% 
  pull(Gene.name)

overlap <- intersect(msa.dep, pd.dep)

highlight_proteins <- c("CTSS", "FCGR2A", "LAMP1", "CTSD", "CLTC") # From suppl. table 14, KEGG phagosome or lysosome pathways

msa.dep %<>% .[!. %in% overlap]
pd.dep %<>% .[!. %in% overlap]
overlap %<>% .[!. %in% highlight_proteins]

string_db <- STRINGdb$new(
  version = "12.0",       # or latest available
  species = 9606,         # 9606 = Homo sapiens
  score_threshold = 400,  # confidence score cutoff
  input_directory = ""
)

genes <- c(msa.dep, pd.dep, overlap, highlight_proteins) %>% unique()

mapped_genes <- string_db$map(
  data.frame(gene = genes),
  "gene",
  removeUnmappedRows = TRUE
)
## Warning:  we couldn't map to STRING 5% of your identifiers
# Get interactions
interactions <- string_db$get_interactions(mapped_genes$STRING_id)

# Convert to igraph object
ppi_graph <- graph_from_data_frame(interactions, directed = FALSE)

# Create label mapping: STRING ID to gene name
string_to_gene <- mapped_genes %>%
  select(STRING_id, gene) %>%
  distinct()

# Add labels to graph nodes
V(ppi_graph)$label <- string_to_gene$gene[match(V(ppi_graph)$name, string_to_gene$STRING_id)]

custom_colors <- c(
  "CTSS" = "firebrick",
  "FCGR2A" = "purple2",
  "LAMP1" = "purple2",
  "CTSD" = "navyblue",
  "CLTC" = "navyblue",
  setNames(rep("tomato", length(msa.dep)), msa.dep),
  setNames(rep("steelblue", length(pd.dep)), pd.dep),
  setNames(rep("orchid", length(overlap)), overlap))

# Apply color mapping to node attributes
V(ppi_graph)$node_color <- custom_colors[V(ppi_graph)$label]

# Get node labels for each edge endpoint
edge_ends <- ends(ppi_graph, es = E(ppi_graph), names = FALSE)
source_labels <- V(ppi_graph)$label[edge_ends[, 1]]
target_labels <- V(ppi_graph)$label[edge_ends[, 2]]

# Assign edge width
E(ppi_graph)$edge_width <- ifelse(
  source_labels %in% highlight_proteins & target_labels %in% highlight_proteins,
  0.5, 0.1
)

# Compute node sizes based on label
V(ppi_graph)$node_size <- sapply(V(ppi_graph)$label, \(x) if (x %in% overlap) 3 else if (x %in% highlight_proteins) 5 else 2)

V(ppi_graph)$label_type <- ifelse(
  V(ppi_graph)$label %in% highlight_proteins,
  "highlight", "other"
)

# Plot the network with size and color
ggraph(ppi_graph, layout = "fr") +
  geom_edge_link(aes(width = I(edge_width)), alpha = 0.4) +
  geom_node_point(aes(color = I(node_color), size = I(node_size)), show.legend = FALSE) +
  geom_node_label(data = function(x) { x[x$label_type == "highlight", ] },
                  aes(label = label), size = 3, label.padding = unit(0.15, "lines"), repel = TRUE, show.legend = F) +
  geom_node_text(data = function(x) { x[x$label_type == "other", ] },
                 aes(label = label), size = 2, repel = TRUE, show.legend = F) +
  theme_void()

Export source data

ppi_graph %>% 
  vertex.attributes() %>% 
  as.data.frame() %>% 
  write.table("source_data/F6j_nodes.tsv", sep = "\t", dec = ".", row.names = F)

ppi_graph %>% 
  as_edgelist() %>% 
  as.data.frame() %>% 
  setNames(c("from", "to")) %>% 
  cbind(edge.attributes(ppi_graph)) %>% 
  write.table("source_data/F6j_edges.tsv", sep = "\t", dec = ".", row.names = F)

Create source data file

Not rerun here

tsv_files <- dir("source_data", pattern = ".tsv", full.names = T)

nn <- tsv_files %>% 
  basename() %>% 
  tools::file_path_sans_ext() %>% 
  gsub("F", "Figure ", ., fixed = T)

tsv_files %>% 
  lapply(., read.delim, check.names = FALSE, sep = "\t", dec = ".") %>% 
  setNames(nn) %>% 
  writexl::write_xlsx("source_data/source_data.xlsx")

Extended Data Figure 1

Load data

samples <- anno.major %>% 
  names() %>% 
  strsplit("!!") %>% 
  sget(1) %>% 
  unique()

crm <- qread("crm.qs",
             nthreads = 10)
crm$theme <- theme_bw()

1a-f

mtp <- crm$selectMetrics(ids = c(1:4,6,19))

plot.list <- mtp %>% 
  lapply(\(met) crm$plotSummaryMetrics(metrics = met, comp.group = "sample", second.comp.group = "group") + 
           scale_fill_manual(values = pal.major) +
           labs(x = "") + 
           theme(axis.text.x = element_blank(),
                 axis.ticks.x = element_blank())) 
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
plot.list %>% 
  plot_grid(plotlist = ., labels = letters[1:6], ncol = 2)

1g

crm$plotSummaryMetrics(metrics = crm$selectMetrics(ids = 20), comp.group = "sample", second.comp.group = "group") + 
  scale_fill_manual(values = pal.major) +
  labs(x = "") + 
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "right") -> p
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
p

1h

crm$plotFilteredCells(doublet.method = "doubletdetection", depth.cutoff = 5e2, size = 0.2, alpha = 0.1) +
  dotSize(3) -> p
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p

1i

crm$plotFilteredCells(type = "bar", doublet.method = "doubletdetection", depth.cutoff = 5e2)$data %>%  
  filter(sample != "MSA_1406") %>%
  mutate(sample = strsplit(sample, "_") %>% 
           sget(1)) %$% 
  split(., sample) %>% 
  lapply(\(x) split(x, x$filter)) %>% 
  lapply(lapply, \(df) mutate(df, sample = paste0(sample, seq(nrow(df))))) %>% 
  lapply(bind_rows) %>% 
  bind_rows() %>%
  mutate(sample = factor(sample, levels = c(paste0("CTRL", seq(10)), paste0("MSA", seq(7)), paste0("PD", seq(12))))) %>% 
  ggplot(aes(sample, pct, fill = filter)) + 
  geom_bar(stat = "identity") + 
  geom_text_repel(aes(label = sprintf("%0.2f", round(pct, digits = 2))), 
                  position = position_stack(vjust = 0.5), 
                  direction = "y", 
                  size = 2.5) + 
  crm$theme + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  labs(x = "", y = "Percentage cells filtered") +
  theme(legend.position = "bottom") -> p

p

Extended Data Figure 2

These figures cannot be reproduced here due to GDPR. Leaving the code for visibility.

Load data

crm <- qread("crm.qs", nthreads = 10)

2a

crm$plotCbCells() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

2b

crm$plotCbAmbGenes() +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.ticks.x = element_line()) -> p

p
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   19187092  1024.8   39451381  2107.0   39451381  2107.0
## Vcells 3010817893 22970.8 4301320151 32816.5 9054649287 69081.5

2c

c("RBFOX3","MOG","VCAN","AQP4","CSF1R","PDGFRB","PTPRC","MRC1","GAD1","SLC17A7","PPP1R1B") %>%
  sort() %>% 
  lapply(function(g) con$plotGraph(embedding = "UMAP", gene=g, title=g, size=0.1, plot.na = F, palette = colorRampPalette(c("grey90","firebrick"))) + theme(line = element_blank())) -> plot.list

plot.list %>% 
  cowplot::plot_grid(plotlist=., ncol=2)

Extended Data Figure 3

con$embedding <- con$embeddings$UMAP

sample.groups <- con$samples %>% 
  names() %>% 
  setNames(grepl.replace(., c("CTRL","MSA","PD")), .)

cao <- Cacoa$new(data.object = con, 
                 sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "DISEASE") %>% 
                   setNames(names(sample.groups)), 
                 cell.groups = anno.major, 
                 ref.level = "CTRL", 
                 target.level = "DISEASE", 
                 n.cores = 32)

meta <- read.delim("metadata.tsv", sep = "\t") %>%
  filter(sample %in% names(con$samples)) %>%
  tibble::column_to_rownames("sample") %>% 
  dplyr::select(-condition)

meta %<>%
  mutate(sample = rownames(.))

spc <- con$getDatasetPerCell()

mpc <- spc %>% 
  data.frame(sample = ., cid = names(.))

for (cc in colnames(meta)) {
  mpc[[cc]] <- meta[[cc]][match(mpc$sample, meta$sample)]
}

mpc$subtype[mpc$subtype == ""] <- NA
p <- con$plotGraph(colors = setNames(mpc$age, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, title = "Age", size = 0.3, alpha = 0.3) +
  scale_color_gradient(low = "grey", high = "firebrick") +
  theme(line = element_blank()) +
  labs(col = "Years")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p

p <- con$plotGraph(colors = setNames(mpc$pmi, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, title = "PMI", size = 0.3, alpha = 0.3) +
  scale_color_gradient(low = "grey", high = "firebrick") +
  theme(line = element_blank()) +
  labs(col = "Hours")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p

p <- con$plotGraph(colors = setNames(mpc$disease_duration, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, title = "Disease duration", size = 0.3, alpha = 0.3) +
  scale_color_gradient(low = "grey", high = "firebrick") +
  theme(line = element_blank()) +
  labs(col = "Years")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p

p <- con$plotGraph(groups = setNames(mpc$sample, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Sample", size = 0.3, alpha = 0.3) +
  theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$brain_bank, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Brain bank", size = 0.3, alpha = 0.3) +
  theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$sex, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Sex", size = 0.3, alpha = 0.3) +
  theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$subtype, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Subtype", size = 0.3, alpha = 0.3) +
  theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$extract, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Extract", size = 0.3, alpha = 0.3) +
  theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$flowcell, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Flowcell", size = 0.3, alpha = 0.3) +
  theme(line = element_blank()) + dotSize(3)

p

Extended Data Figure 4 & EDT3

Load metadata

meta <- read.delim("metadata.tsv", sep = "\t") %>%
  filter(sample %in% names(con$samples)) %>%
  tibble::column_to_rownames("sample") %>% 
  mutate(sample = rownames(.)) %>% 
  dplyr::select(-condition)

4a

con_msa <- con$clone()
con_msa$embedding <- con$embeddings$UMAP

con_msa$samples <- con$samples %>% .[!grepl("PD", names(.))]

sample.groups <- con_msa$samples %>% 
  names() %>% 
  setNames(grepl.replace(., c("CTRL","MSA")), .)

cao_msa <- Cacoa$new(data.object = con_msa, 
                     sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "MSA") %>% 
                       setNames(names(sample.groups)), 
                     cell.groups = anno.major, 
                     ref.level = "CTRL", 
                     target.level = "MSA", 
                     n.cores = 32)

cao_msa$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao_msa$estimateMetadataSeparation(sample.meta = meta %>% filter(!grepl("PD", sample)) %>% dplyr::select(-brain_bank, -sample), 
                                   space = "expression.shifts")
plot.list <- list(
  cao_msa$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP", title = "Condition"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$age %>% setNames(rownames(meta)), title = "Age"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$disease_duration %>% setNames(rownames(meta)), show.sample.size = T, title = "Disease duration"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)), show.sample.size = T, title = "Extract batch"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)), show.sample.size = T, title = "Flowcell"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$pmi %>% setNames(rownames(meta)), title = "PMI"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$sex %>% setNames(rownames(meta)), show.sample.size = T, title = "Sex"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$subtype %>% setNames(rownames(meta)), show.sample.size = T, title = "Subtype"),
  cao_msa$test.results$metadata.separation$padjust %>%
    data.frame(p = .) %>%
    mutate(pp = -log10(p), padj = formatC(p, digits = 3)) %>%
    mutate(padj = sapply(padj, \(x) if (x > 0.05) "" else paste("p.adj = ", x, sep = ""))) %>% 
    tibble::rownames_to_column("covariate") %>%
    ggplot(aes(covariate, pp)) +
    geom_bar(stat = "identity", fill = "lightblue4") +
    ylim(0, pmax(2, max(-log10(cao_msa$test.results$metadata.separation$padjust)))) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
    labs(x = "", y = "-log10(adj. p)", title = "Association significance") +
    geom_hline(yintercept = -log10(0.05), colour = "red3") +
    geom_text(aes(label = padj, y = pp + 0.1), size = 8)
)
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
cowplot::plot_grid(plotlist = plot.list, ncol = 5) -> p

p

4b

con_pd <- con$clone()
con_pd$embedding <- con$embeddings$UMAP

con_pd$samples <- con$samples %>% .[!grepl("MSA", names(.))]

sample.groups <- con_pd$samples %>% 
  names() %>% 
  setNames(grepl.replace(., c("CTRL","PD")), .)

cao_pd <- Cacoa$new(data.object = con_pd, 
                    sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "PD") %>% 
                      setNames(names(sample.groups)), 
                    cell.groups = anno.major, 
                    ref.level = "CTRL", 
                    target.level = "PD", 
                    n.cores = 32)

cao_pd$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao_pd$estimateMetadataSeparation(sample.meta = meta %>% filter(!grepl("MSA", sample)) %>% dplyr::select(-disease_duration, -subtype, -sample), 
                                  space = "expression.shifts")
plot.list <- list(
  cao_pd$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP", title = "Condition"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$age %>% setNames(rownames(meta)), title = "Age"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$brain_bank %>% setNames(rownames(meta)), show.sample.size = T, title = "Brain bank"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)), show.sample.size = T, title = "Extract batch"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)), show.sample.size = T, title = "Flowcell"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$pmi %>% setNames(rownames(meta)), title = "PMI"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$sex %>% setNames(rownames(meta)), show.sample.size = T, title = "Sex"),
  cao_pd$test.results$metadata.separation$padjust %>%
    data.frame(p = .) %>%
    mutate(pp = -log10(p), padj = formatC(p, digits = 3)) %>%
    mutate(padj = sapply(padj, \(x) if (x > 0.05) "" else paste("p.adj = ", x, sep = ""))) %>% 
    tibble::rownames_to_column("covariate") %>%
    ggplot(aes(covariate, pp)) +
    geom_bar(stat = "identity", fill = "lightblue4") +
    ylim(0, pmax(2, max(-log10(cao_pd$test.results$metadata.separation$padjust)))) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
    labs(x = "", y = "-log10(adj. p)", title = "Association significance") +
    geom_hline(yintercept = -log10(0.05), colour = "red3") +
    geom_text(aes(label = padj, y = pp + 0.1), size = 3)
)
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
cowplot::plot_grid(plotlist = plot.list, ncol = 4) -> p

p

4c

con_dis <- con$clone()
con_dis$embedding <- con$embeddings$UMAP

con_dis$samples <- con$samples %>% .[!grepl("CTRL", names(.))]

sample.groups <- con_dis$samples %>% 
  names() %>% 
  setNames(grepl.replace(., c("PD","MSA")), .)

cao_dis <- Cacoa$new(data.object = con_dis, 
                     sample.groups = ifelse(grepl("PD", sample.groups), "PD", "MSA") %>% 
                       setNames(names(sample.groups)), 
                     cell.groups = anno.major, 
                     ref.level = "PD", 
                     target.level = "MSA", 
                     n.cores = 32)

cao_dis$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao_dis$estimateMetadataSeparation(sample.meta = meta %>% filter(!grepl("CTRL", sample)) %>% dplyr::select(-sample), 
                                   space = "expression.shifts")
plot.list <- list(
  cao_dis$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP", title = "Condition"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$age %>% setNames(rownames(meta)), title = "Age"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$brain_bank %>% setNames(rownames(meta)), show.sample.size = T, title = "Brain bank"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$disease_duration %>% setNames(rownames(meta)), show.sample.size = T, title = "Disease duration"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)), show.sample.size = T, title = "Extract batch"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)), show.sample.size = T, title = "Flowcell"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$pmi %>% setNames(rownames(meta)), title = "PMI"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$sex %>% setNames(rownames(meta)), show.sample.size = T, title = "Sex"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$subtype %>% setNames(rownames(meta)), show.sample.size = T, title = "Subtype"),
  cao_dis$test.results$metadata.separation$padjust %>%
    data.frame(p = .) %>%
    mutate(pp = -log10(p), padj = formatC(p, digits = 3)) %>%
    mutate(padj = sapply(padj, \(x) if (x > 0.05) "" else paste("p.adj = ", x, sep = ""))) %>% 
    tibble::rownames_to_column("covariate") %>%
    ggplot(aes(covariate, pp)) +
    geom_bar(stat = "identity", fill = "lightblue4") +
    ylim(0, pmax(2, max(-log10(cao_pd$test.results$metadata.separation$padjust)))) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
    labs(x = "", y = "-log10(adj. p)", title = "Association significance") +
    geom_hline(yintercept = -log10(0.05), colour = "red3") +
    geom_text(aes(label = padj, y = pp + 0.1), size = 8)
)
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
cowplot::plot_grid(plotlist = plot.list, ncol = 5) -> p

p

Extended Data Table 3

Not rerun

list(cao_msa, cao_pd, cao_dis) %>% 
  lapply(getElement, "test.results") %>% 
  lget("metadata.separation") %>%
  lapply(\(x) x[-1]) %>%
  Map(\(dat, nn) bind_cols(dat) %>% data.frame() %>%  `rownames<-`(nn), dat = ., nn = lapply(., lapply, names) %>% sget(1)) %>% 
  setNames(c("MSA", "PD", "dis")) %>% 
  lapply(tibble::rownames_to_column, var = "covariate") %>% 
  bind_rows(.id = "comparison") %>% 
  mutate(comparison = scHelper::grepl.replace(comparison, c("MSA", "PD", "dis"), c("CTRL vs MSA", "CTRL vs PD", "PD vs MSA"))) %>% 
  write.table("Table SX - Cacoa covariate analysis.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Figure 5

To investigate effects from co-variates, we ran scITD on all cell types for each comparison. We don’t determine factors here as it takes a significant amount of time, but we keep the code in the first instance.

We tested different levels of var_scale_power, here we’re just showing var_scale_power = 0.5 as we didn’t find any effects except for OPCs. Per default, we set vargenes_method=“norm_var_pvals” and adjusted vargenes_thresh to obtain around 1,000 var. genes, but in some instances we took the top most variable genes instead.

Neurons, preparation

con.tmp <- qread("cao_neurons.qs", nthreads = 10)$data.object
anno <- qread("anno_neurons.qs")

anno %<>% 
  collapseAnnotation("GABA") %>%
  collapseAnnotation("GLU") %>%
  renameAnnotation("GABA", "Inh. neurons") %>%
  renameAnnotation("GLU", "Exc. neurons") %>%
  collapseAnnotation("MSN")
## Collapsing 11 labels containing 'GABA' in their name into one label.
## Collapsing 2 labels containing 'GLU' in their name into one label.
## Collapsing 3 labels containing 'MSN' in their name into one label.
cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[!grepl("MT-|RPS|RPL", rownames(.)), ]

meta.all <- con.tmp$getDatasetPerCell() %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw %>% .[, colnames(.) %in% :
## Cell type names cannot have special characters '.', '_', or ':'. Removing these
## characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=1,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=0.7,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 10 donors. All donors have at least 1 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1179
# get assistance with rank determination
itd_container %<>% determine_ranks_tucker(max_ranks_test=c(10,15),
                                          shuffle_level='cells',
                                          num_iter=10,
                                          norm_method='trim',
                                          scale_factor=10000,
                                          scale_var=TRUE,
                                          var_scale_power=0.5)

itd_container$plots$rank_determination_plot
itd_container %<>% run_tucker_ica(ranks=c(5,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations="pval")

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=1,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var',
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 9 donors. All donors have at least 1 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1084
itd_container %<>% run_tucker_ica(ranks=c(5,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=1,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var', 
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 7 donors. All donors have at least 1 cells in each cell type included.
## Consider using fewer cell types or reducing the donor_min_cells parameter to include more donors.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1084
itd_container %<>% run_tucker_ica(ranks=c(5,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

Astrocytes, preparation

con.tmp <- qread("con_astrocytes.qs", nthreads = 10)
anno <- qread("anno_astro.qs")

cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>%
  .[, !grepl("MT-|RPS|RPL", colnames(.))] %>% 
  Matrix::t()

meta.all <- con.tmp$getDatasetPerCell() %>% 
  .[names(.) %in% names(anno)] %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw %>% .[, colnames(.) %in% :
## Cell type names cannot have special characters '.', '_', or ':'. Removing these
## characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=0.05,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 17 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 704
itd_container %<>% run_tucker_ica(ranks=c(5,8),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.05,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 22 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 871
itd_container %<>% run_tucker_ica(ranks=c(5,9),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.05,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 19 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 701
itd_container %<>% run_tucker_ica(ranks=c(6,7),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

Microglia, preparation

con.tmp <- qread("cao_micro_pvm.qs", nthreads = 10)$data.object
anno <- qread("anno_micro_pvm.qs")

cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[!grepl("MT-|RPS|RPL", rownames(.)), ]

meta.all <- con.tmp$getDatasetPerCell() %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=3,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var', 
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 10 donors. All donors have at least 3 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1114
itd_container %<>% run_tucker_ica(ranks=c(3,7),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

factors <- c('sex','pmi', "age", "flowcell", "extract")

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test = factors, 
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars= factors,
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=3,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var', 
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 18 donors. All donors have at least 3 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1012
itd_container %<>% run_tucker_ica(ranks=c(4,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=3,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var', 
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 14 donors. All donors have at least 3 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1001
itd_container %<>% run_tucker_ica(ranks=c(4,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

Oligodendrocytes, preparation

con.tmp <- qread("con_oligodendrocytes.qs", nthreads = 10)
anno <- qread("anno_oligo.qs")

cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>%
  .[rownames(.) %in% names(anno), !grepl("MT-|RPS|RPL", colnames(.))] %>% 
  Matrix::t()

meta.all <- con.tmp$getDatasetPerCell() %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw %>% .[, colnames(.) %in% :
## Cell type names cannot have special characters '.', '_', or ':'. Removing these
## characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=0.05,
                               scale_var = TRUE, 
                               var_scale_power = 1) # Diminishable effect at 0.5, but not on any other level
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 17 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1209
itd_container %<>% run_tucker_ica(ranks=c(6,9),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.05,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 21 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1505
itd_container %<>% run_tucker_ica(ranks=c(4,8),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.05,
                               scale_var = TRUE, 
                               var_scale_power = 1) # Diminishable effect at 0.5, but not on any other level
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 18 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1146
itd_container %<>% run_tucker_ica(ranks=c(5,6),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

OPCs, preparation

con.tmp <- qread("con_opcs.qs", nthreads = 10)
anno <- qread("anno_opc.qs")

cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[!grepl("MT-|RPS|RPL", rownames(.)), ]

meta.all <- con.tmp$getDatasetPerCell() %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=0.2,
                               scale_var = TRUE, 
                               var_scale_power = 0.5) # Diminishable effect at 2, but not on any other level
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 17 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 778
itd_container %<>% run_tucker_ica(ranks=c(4,5),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.4,
                               scale_var = TRUE, 
                               var_scale_power = 1) # No effect at 0.5, but 1, 1.5, 2
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 21 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 833
itd_container %<>% run_tucker_ica(ranks=c(4,6),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.4,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 18 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 617
itd_container %<>% run_tucker_ica(ranks=c(5,6),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

##              used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   18930383 1011.0   39451381  2107.0   39451381  2107.0
## Vcells 1088563262 8305.1 3171389962 24195.8 9054649287 69081.5

Extended Data Figure 6

For panels a-d, see scCODA.ipynb.

Load data

anno.major <- qread("anno_major.qs")
meta <- read.delim("metadata.tsv", sep = "\t")

anno.micro <- qread("anno_micro_pvm.qs") %>% 
  renameAnnotation("Steady-state","MIC_steady-state") %>% 
  renameAnnotation("Intermediate1","MIC_intermediate1") %>% 
  renameAnnotation("Intermediate2","MIC_intermediate2") %>% 
  renameAnnotation("Activated","MIC_activated")

anno.glia <- c("anno_astro.qs",
               "anno_oligo.qs",
               "anno_opc.qs") %>% 
  lapply(qread) %>% 
  Reduce(c, .) %>% 
  factor() %>% 
  renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>% 
  renameAnnotation("Reactive_astrocytes", "AS_reactive") %>% 
  renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
  renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>% 
  renameAnnotation("Reactive_SCGZ", "OL_SGCZ")

anno.neurons <- qread("anno_neurons.qs")

6e

# Minor final
anno.minor <- anno.major[!anno.major %in% c("PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>% 
  {factor(c(., anno.glia, anno.micro))} %>% 
  factor(levels = sort(levels(.)))

df <- data.frame(cid = names(anno.minor), anno = unname(anno.minor)) %>% 
  mutate(sample = strsplit(cid, "!!") %>% sget(1)) %>% 
  group_by(sample, anno) %>% 
  summarize(n = n()) %>% 
  mutate(prop = n / sum(n) * 100) %>% 
  ungroup() %>% 
  filter(anno %in% c("Inh. neurons", "Exc. neurons", "MSN")) %>% 
  mutate(age = meta$age[match(sample, meta$sample)]) %>% 
  mutate(age_bin = cut(age, breaks = c(0, 60, 70, 80, 90), labels = c("0-60", "60-70", "70-80", "80-90")),
         condition = grepl.replace(sample, c("CTRL", "MSA", "PD")))
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by sample and anno.
## ℹ Output is grouped by sample.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(sample, anno))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
df %>% 
  ggplot(aes(age_bin, prop)) + 
  geom_boxplot() +
  geom_point(position = position_dodge(width = 0.85)) + 
  theme_bw() +
  facet_wrap(~ anno) -> p

p

Statistics

df %>% 
  group_by(anno) %>%
  rstatix::kruskal_test(prop ~ age_bin) %>% 
  mutate(padj = p.adjust(p, method = "BH"))
## # A tibble: 3 × 8
##   anno         .y.       n statistic    df     p method          padj
##   <fct>        <chr> <int>     <dbl> <int> <dbl> <chr>          <dbl>
## 1 Exc. neurons prop     15     0.788     3 0.852 Kruskal-Wallis 0.852
## 2 Inh. neurons prop     24     5.23      3 0.156 Kruskal-Wallis 0.376
## 3 MSN          prop     24     4.10      3 0.251 Kruskal-Wallis 0.376

6f

# Minor final
anno.minor <- anno.major[!anno.major %in% c("Inh. neurons", "MSN", "Exc. neurons", "PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>% 
  {factor(c(., anno.neurons, anno.glia, anno.micro))} %>% 
  factor(levels = sort(levels(.)))

df <- data.frame(cid = names(anno.minor), anno = unname(anno.minor)) %>% 
  mutate(sample = strsplit(cid, "!!") %>% sget(1)) %>% 
  group_by(sample, anno) %>% 
  summarize(n = n()) %>% 
  mutate(prop = n / sum(n) * 100) %>% 
  ungroup() %>% 
  filter(anno %in% levels(anno.micro)) %>% 
  mutate(age = meta$age[match(sample, meta$sample)]) %>% 
  mutate(age_bin = cut(age, breaks = c(0, 60, 70, 80, 90), labels = c("0-60", "60-70", "70-80", "80-90")),
         condition = grepl.replace(sample, c("CTRL", "MSA", "PD")))
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by sample and anno.
## ℹ Output is grouped by sample.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(sample, anno))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
df %>% 
  ggplot(aes(age_bin, prop)) + 
  geom_boxplot() +
  geom_point(position = position_dodge(width = 0.85)) + 
  theme_bw() +
  facet_wrap(~ anno) -> p

p

Statistics

df %>% 
  group_by(anno) %>%
  rstatix::kruskal_test(prop ~ age_bin) %>% 
  mutate(padj = p.adjust(p, method = "BH"))
## # A tibble: 5 × 8
##   anno              .y.       n statistic    df      p method          padj
##   <fct>             <chr> <int>     <dbl> <int>  <dbl> <chr>          <dbl>
## 1 MIC_activated     prop     29      7.58     3 0.0556 Kruskal-Wallis 0.139
## 2 MIC_intermediate1 prop     29      4.58     3 0.206  Kruskal-Wallis 0.258
## 3 MIC_intermediate2 prop     29      8.94     3 0.0302 Kruskal-Wallis 0.139
## 4 MIC_steady-state  prop     29      2.78     3 0.427  Kruskal-Wallis 0.427
## 5 PVMs              prop     29      5.13     3 0.163  Kruskal-Wallis 0.258

Extended Data Figure 7

We’re calculating cosine similarities to annotated data from Garma et al.. The data are available here.

Prepare data

Here, we’re showing how we prepared the data. This will not be run again.

lp.raw <- loomR::connect("Interneurons_raw.loom", mode = "r+", skip.validate = T)

cm.raw <- lp.raw[["matrix"]][,] %>% 
  `dimnames<-`(list(
    lp.raw[["col_attrs/obs_names"]][], 
    lp.raw[["row_attrs/var_names"]][]
  ))

lp.raw$close()

lp.norm <- loomR::connect("Interneurons_processed.loom", mode = "r+", skip.validate = T)

anno <- setNames(
  lp.norm$col.attrs$`IN subclass`[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()

spc <- setNames(
  lp.norm$col.attrs$sample[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()

origin <- setNames(
  lp.norm$col.attrs$Origin[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()

area <- setNames(
  lp.norm$col.attrs$Region[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()

sex <- setNames(
  lp.norm$col.attrs$Sex[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()

umap <- `rownames<-`(lp.norm$col.attrs$X_umap[,] %>% t(),
                     lp.norm$col.attrs$obs_names[])

lp.norm$close()

Then, we need to prepare a Conos object of those data. This will not be run here.

preprocessed <- spc %>%
  split(names(.), .) %>%
  .[sapply(., length) > 50] %>%
  lapply(\(cid) cm.raw[cid, ]) %>%
  lapply(Matrix::t) %>%
  lapply(basicP2proc, get.largevis = F, get.tsne = F, make.geneknn = F, n.cores = 20)

con <- Conos$new(preprocessed, n.cores = 32)
con$embeddings$PCA$UMAP <- umap
con$embedding <- umap
con$clusters$leiden$groups <- anno
con$clusters$sex$groups <- sex
con$clusters$area$groups <- area
con$clusters$origin$groups <- origin
con$clusters$spc$groups <- spc

qsave(con, "con_garma.qs", nthreads = 10)

7a

# Our data
rydbirk.neurons.anno <- qread("anno_neurons.qs") %>% 
  factor(labels = paste("Rydbirk", levels(.), sep = "-"))

rydbirk.neurons <- qread("cao_neurons.qs", nthreads = 10)$data.object

rydbirk.neurons.pseudo <- rydbirk.neurons$getJointCountMatrix(raw = T) %>%
  sccore::collapseCellsByType(groups = rydbirk.neurons.anno, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

rydbirk.neurons.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("-") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(rydbirk.neurons.pseudo), "group")), 
                                 design = ~ 1) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)

# Garma data
con.garma <- qread("con_garma.qs", nthreads = 10)

garma.neurons.anno <- con.garma$clusters$anno$groups %>% 
  factor() %>% 
  factor(labels = paste("Garma", levels(.), sep = "-"))

garma.neurons.pseudo <- con.garma$getJointCountMatrix(raw = T) %>% 
  sccore::collapseCellsByType(groups = garma.neurons.anno, min.cell.count = 1) %>%
  t() %>%
  apply(2, as, "integer")

garma.neurons.pseudo %<>%
  DESeq2::DESeqDataSetFromMatrix(.,
                                 colnames(.) %>%
                                   strsplit("_") %>%
                                   sget(1) %>%
                                   data.frame() %>%
                                   `dimnames<-`(list(colnames(garma.neurons.pseudo), "group")),
                                 design = ~ 1) %>%
  DESeq2::estimateSizeFactors() %>%
  DESeq2::counts(normalized = T)

# We rotate Garma into Rydbirk sample PCA space
cm.rydbirk <- rydbirk.neurons.pseudo %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)
cm.garma <- garma.neurons.pseudo %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)

genesToKeep <- conos:::getOdGenesUniformly(append(con.garma$samples, rydbirk.neurons$samples), 50) %>%
  intersect(rownames(cm.rydbirk)) %>% 
  intersect(rownames(cm.garma))

pc.res <-
  cm.rydbirk[genesToKeep, ] %>%
  t() %>%
  prcomp(center = T,
         scale = T)

pc.tmp <- cm.garma[genesToKeep, ] %>%
  as.data.frame() %>%
  filter(rowSums(.) > 0) %>%
  t() %>%
  scale(pc.res$center, pc.res$scale) %*% pc.res$rotation
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
  data.frame() %>% 
  mutate(id = rownames(.)) %>% 
  mutate(study = ifelse(grepl("Rydbirk", id), "Rydbirk", "Garma") %>% factor(),
         anno = strsplit(id, "-") %>% sget(2))

lsa::cosine(dat.plot %>% 
              select(-id, -study, -anno) %>%
              t()) %>%
  reshape2::melt() %>% 
  ggplot(aes(Var1, Var2, fill = value)) +
  geom_tile() +
  scale_fill_gradient2(low = "skyblue3", mid = "white", high = "deeppink3") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  labs(title = "Top 50 OD genes, Garma PCA rotation into Rydbirk PCA space")

7b

cm.garma.raw <- con.garma$getJointCountMatrix(raw = T)
cm.rydbirk.raw <- rydbirk.neurons$getJointCountMatrix(raw = T)

idx <- intersect(colnames(cm.garma.raw), colnames(cm.rydbirk.raw))

cm.garma.ext <- cm.garma.raw %>% 
  sccore:::extendMatrix(idx)
cm.rydbirk.ext <- cm.rydbirk.raw %>% 
  sccore:::extendMatrix(idx)

cm.comb <- rbind(cm.garma.ext, cm.rydbirk.ext)
anno.comb <- c(garma.neurons.anno %>% .[names(.) %in% rownames(cm.comb)], rydbirk.neurons.anno %>% .[names(.) %in% rownames(cm.comb)]) %>% 
  factor()

cm.comb %<>% .[rownames(.) %in% names(anno.comb), ]

unique(
  c("GAD1","GAD2","PPP1R1B","SLC17A7","CHAT","CALB2","DCC","ID2","MEIS2","ST18","NKX2-1","NR2F2","PAX6","PVALB","RXFP1","SST","VIP","RELN","SATB2","DRD1","DRD2","FOXP2", "LRP8", "VLDLR") %>% 
    c("CCK", "VIP", "CXCL14", "CHAT", "PTHLH", "MOXD1", "CHST9", "TAC3", "SST", "NPY", "PVALB", "GRIK3", "DACH1", "GAD1", "GAD2")
) %>% 
  sccore::dotPlot(cm.comb, anno.comb) + scale_color_gradient(low = "grey80", high = "firebrick")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   20307595  1084.6   39451381  2107.0   39451381  2107.0
## Vcells 1512121776 11536.6 3171389962 24195.8 9054649287 69081.5

Extended Data Figure 8

Prepare data

We’re calculating cosine similarities to annotated data from Kamath et al.. The data are available here.

cm <- Matrix::readMM("GSE178265_Homo_matrix.mtx") %>% 
  `dimnames<-`(list(
    read.delim("GSE178265_Homo_features.tsv", header = F)$V1,
    read.delim("GSE178265_Homo_bcd.tsv", header = F)$V1
  ))

metadata.raw <- read.delim("METADATA_PD.tsv", header = T)[-1, ]

cells.to.omit <- metadata.raw %>% 
  filter(donor_id %in% c("NIH1", "Rat1", "TreeShrew1")) %>% 
  pull(NAME)

meta.per.cell <- metadata.raw %>% 
  filter(!NAME %in% cells.to.omit)

meta.per.sample <- metadata.raw %>% 
  filter(!NAME %in% cells.to.omit) %>% 
  select(-NAME, -libname, -biosample_id) %>% 
  mutate(area = grepl.replace(organ__ontology_label, c("substantia nigra pars compacta", "caudate nucleus"), c("SN", "CN"))) %>% 
  mutate(donor_area = paste(donor_id, area, sep = "-")) %>% 
  .[match(unique(.$donor_area), .$donor_area), ]

cids.per.donor.area <- meta.per.cell %>% 
  mutate(area = grepl.replace(organ__ontology_label, c("substantia nigra pars compacta", "caudate nucleus"), c("SN", "CN"))) %>% 
  mutate(donor_area = paste(donor_id, area, sep = "-")) %$% 
  split(NAME, donor_area)

cm.list <- meta %>%
  sccore::plapply(\(mm) cm[, colnames(cm) %in% rownames(mm)], n.cores = 7)

cm.area.donor <- cm.list %>%
  lapply(\(area) sccore::plapply(cids.per.donor.area, \(cid) area[, colnames(area) %in% cid], n.cores = 22)) %>%
  lapply(\(cms) cms[sapply(cms, ncol) > 30]) %>%
  lapply(lapply, as, "CsparseMatrix") %>%
  lapply(lapply, basicP2proc, get.largevis = F, get.tsne = F, make.geneknn = F, n.cores = 20)
# Annotations
embeddings <- dir(pattern = "UMAP")

embeddings %<>% 
  lapply(\(type) read.delim(type, header = T, row.names = 1)) %>% 
  setNames(embeddings %>% strsplit("_") %>% sget(1)) %>% 
  lapply(dplyr::rename, subtype = Cell_Type)

con.list <- names(embeddings) %>% 
  lapply(\(nn) {
    con <- Conos$new(cm.area.donor[[nn]], n.cores = 32)
    con$embeddings <- list(UMAP = embeddings[[nn]][, -3])
    con$embedding <- con$embeddings[[1]]
    con$clusters <- list(leiden = list(groups = embeddings[[nn]] %>% {setNames(pull(., subtype), rownames(.))}))
    
    return(con)
  }) %>% 
  setNames(names(embeddings))
##              used    (Mb) gc trigger  (Mb)   max used    (Mb)
## Ncells   22781884  1216.7   39451381  2107   39451381  2107.0
## Vcells 6139172459 46838.2 8075595639 61612 9054649287 69081.5

Plot

# Our data
rydbirk.neurons.anno <- qread("anno_neurons.qs") %>% 
  factor(labels = paste("Rydbirk", levels(.), sep = "-"))

rydbirk.neurons <- qread("cao_neurons.qs", nthreads = 10)$data.object

rydbirk.neurons.pseudo <- rydbirk.neurons$getJointCountMatrix(raw = T) %>%
  sccore::collapseCellsByType(groups = rydbirk.neurons.anno, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

rydbirk.neurons.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("-") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(rydbirk.neurons.pseudo), "group")), 
                                 design = ~ 1) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)

# Kamath data
kamath.neurons.anno1 <- con.list$da$clusters$leiden$groups %>% 
  factor() %>% 
  factor(labels = paste("Kamath", levels(.), sep = "-"))

kamath.neurons.anno2 <- con.list$nonda$clusters$leiden$groups %>% 
  factor() %>% 
  factor(labels = paste("Kamath", levels(.), sep = "-"))

kamath.neurons.anno <- factor(c(kamath.neurons.anno1, kamath.neurons.anno2))


kamath.cm1 <- con.list$da$getJointCountMatrix(raw = T)
kamath.cm2 <- con.list$nonda$getJointCountMatrix(raw = T)

kamath.neurons.pseudo <- rbind(kamath.cm1, kamath.cm2) %>%
  sccore::collapseCellsByType(groups = kamath.neurons.anno, min.cell.count = 1) %>%
  t() %>%
  apply(2, as, "integer")

kamath.neurons.pseudo %<>%
  DESeq2::DESeqDataSetFromMatrix(.,
                                 colnames(.) %>%
                                   strsplit("_") %>%
                                   sget(1) %>%
                                   data.frame() %>%
                                   `dimnames<-`(list(colnames(kamath.neurons.pseudo), "group")),
                                 design = ~ 1) %>%
  DESeq2::estimateSizeFactors() %>%
  DESeq2::counts(normalized = T)

# We rotate Kamath into Rydbirk sample PCA space
cm.rydbirk <- rydbirk.neurons.pseudo %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)
cm.kamath <- kamath.neurons.pseudo %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)
genesToKeep <- conos:::getOdGenesUniformly(append(con.list$da$samples, rydbirk.neurons$samples) %>% append(con.list$nonda$samples), 100) %>% 
  intersect(rownames(cm.rydbirk)) %>% 
  intersect(rownames(cm.kamath))

pc.res <-
  cm.rydbirk[genesToKeep, ] %>%
  t() %>%
  prcomp(center = T,
         scale = T)

pc.tmp <- cm.kamath[genesToKeep, ] %>%
  as.data.frame() %>%
  filter(rowSums(.) > 0) %>%
  t() %>%
  scale(pc.res$center, pc.res$scale) %*% pc.res$rotation

# Plot
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
  data.frame() %>% 
  mutate(id = rownames(.)) %>% 
  mutate(study = ifelse(grepl("Rydbirk", id), "Rydbirk", "Kamath") %>% factor(),
         anno = strsplit(id, "-") %>% sget(2))

lsa::cosine(dat.plot %>% 
              select(-id, -study, -anno) %>% 
              t()) %>%
  reshape2::melt() %>% 
  ggplot(aes(Var1, Var2, fill = value)) +
  geom_tile() +
  scale_fill_gradient2(low = "skyblue3", mid = "white", high = "deeppink3") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  labs(title = "Top 100 OD genes, Kamath PCA rotation into Rydbirk PCA space")

##              used    (Mb)  gc trigger    (Mb)    max used    (Mb)
## Ncells   21443376  1145.2    39451381  2107.0    39451381  2107.0
## Vcells 5948867367 45386.3 11629033719 88722.5 11286418961 86108.6

Extended Data Figure 9

Here, we use public data from Smajic et al.. We isolated neurons and used the available annotation (IPDCO_hg_midbrain_cell.tsv).

con.smajic <- qread("smajic_neurons.qs", nthreads = 10)
con.neurons <- qread("cao_neurons.qs", nthreads = 10)$data.object

# Get Smajic annotation
tmp <- data.frame(cid = rownames(con.smajic$embedding)) %>% 
  mutate(barcode = strsplit(cid, "_") %>% 
           sget(3))

anno <- data.table::fread("IPDCO_hg_midbrain_cell.tsv", header = T) %>% 
  mutate(barcode = strsplit(barcode, "_") %>% 
           sget(1)) %>% 
  mutate(cid = tmp$cid[match(barcode, tmp$barcode)]) %>% 
  filter(!is.na(cid)) %>% 
  pull(cell_ontology, cid) %>%
  .[!. %in% c("Astrocytes", "Endothelial cells", "Ependymal", "Microglia", "Oligodendrocytes", "OPCs", "Pericytes")] %>% 
  factor()

cowplot::plot_grid(plotlist = list(
  con.smajic$plotGraph(groups = anno, title = "Smajic neurons, annotation", font.size = 3, shuffle.colors = T, plot.na = F) + theme(line = element_blank()),
  con.neurons$plotGraph(groups = anno.neurons, title = "Rydbirk neurons, annotation", font.size = 3) + theme(line = element_blank()),
  con.smajic$plotGraph(groups = anno, gene = "RELN", title = "RELN expression", plot.na = F) + theme(line = element_blank()),
  con.neurons$plotGraph(gene = "RELN", title = "RELN expression", plot.na = F) + theme(line = element_blank()),
  con.smajic$plotGraph(groups = anno, gene = "CADPS2", title = "CADPS2 expression", plot.na = F) + theme(line = element_blank()),
  con.neurons$plotGraph(gene = "CADPS2", title = "CADPS2 expression", plot.na = F) + theme(line = element_blank())), 
  nrow = 3)

##              used    (Mb)  gc trigger    (Mb)    max used    (Mb)
## Ncells   21578074  1152.4    39451381  2107.0    39451381  2107.0
## Vcells 5949818482 45393.6 11629033719 88722.5 11286418961 86108.6

Extended Data Figure 10

These plots were created with results from LIANA in Python.

Extended Data Figure 11

cm.merged <- con$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.major)]

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.major %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  {. + 1} %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
c("TSPO", "COQ2", "MAPT", "FBXO47", "ELOVL7", "EDN1", "GAB1", "TENM2", "RABGEF1", "PLA2G4C", "INPP4B", "ZIC1", "ZIC2", "ZIC3", "ZIC4", "SNCA", "TPPP", "TPPP") %>% 
  {Map(\(gene, leg) plotGenePseudoBulk(gene, cm.pseudo, leg), gene = ., leg = c(rep(F, 17), T))} %>% 
  cowplot::plot_grid(plotlist = ., ncol = 3) -> p

p

Extended Data Figures 12 & 13

Load and prepare data

cao <- qread("cao_micro_pvm.qs", nthreads = 10)
cao$plot.theme <- theme_bw()

anno.sort <- anno.micro[rownames(cao$data.object$embedding)]
anno.sel <- anno.sort[!anno.sort %in% c("MIC_intermediate1", "PVMs")] %>% factor()

emb <- cao$data.object$embedding %>% 
  `colnames<-`(c("UMAP1","UMAP2")) %>% 
  .[rownames(.) %in% names(anno.sel), ]

sds_obj <- slingshot(emb, 
                     anno.sel, 
                     start.clus = "MIC_steady-state",
                     stretch = 0
)

sds <- as.SlingshotDataSet(sds_obj)

pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1]

12a

plot.df <- cao$data.object$embedding %>% 
  as.data.frame() %>% 
  .[rownames(.) %in% names(sds_obj), ] %>% 
  `colnames<-`(c("UMAP1","UMAP2")) %>% 
  mutate(., annotation = anno.micro[rownames(.)] %>% as.factor()) %>% 
  .[complete.cases(.),]

ldata <- getTscanTrajectory(cao$data.object, anno.sel)

cao$data.object$embedding %>% 
  as.data.frame() %>% 
  mutate(., unname(anno.micro[rownames(.)])) %>% 
  setNames(c("UMAP1", "UMAP2", "annotation")) %>% 
  mutate(., pseudotime = pseudotime[rownames(.)]) %>%
  ggplot() +
  geom_point(aes(UMAP1, UMAP2, col = pseudotime), size = 0.3) +
  geom_line(data = ldata1, aes(UMAP1, UMAP2, group = edge), size = 1) +
  theme_bw() +
  theme(line = element_blank()) +
  scale_color_gradient(low = "navyblue", high = "orange") +
  labs(col = "Pseudotime", x = "UMAP1", y = "UMAP2")# +
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

12b

Here, we show the code for running the generalized additive mixed model. It takes a significant amount of time to run, we provide data in pseudotime_micro_l1.qs.

mat <- cao$data.object$getJointCountMatrix() %>%
  .[match(names(sds_obj), rownames(.)), colSums(.) > 2e2] %>% 
  .[, !grepl(pattern = "RPL|RPS|MT-", colnames(.))]

mt <- mat %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column() %>%
  {message("Mutating"); mutate(.,
                               pseudotime = pseudotime[rowname],
                               condition = cao$data.object$getDatasetPerCell()[rowname] %>%
                                 as.character() %>%
                                 strsplit("_") %>%
                                 sget(1),
                               sample = strsplit(rowname, "!!") %>%
                                 sget(1))} %>%
  .[complete.cases(.), ] %>%
  {message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "condition", "sample"))} %$%
  {message("Splitting"); split(., variable)}
res <- mt %>% 
  sccore::plapply(\(x) {
    fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(condition)), random = ~(1 | sample), REML = F)
    fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | sample), REML = F)
    fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | sample), REML = F)
    
    ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
    
    if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
      residuals <- predict(fit_full$gam, se.fit = T)$fit %>% 
        unname()
      
      r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>% 
        setNames(c("full", "reduced"))
      
      out <- list(anova = ann,
                  residuals = residuals,
                  r.sq = r.sq)
      
      return(out)
    }
  }, n.cores = 40, mc.preschedule = T, mc.cleanup = T, progress = F) %>%
  .[!sapply(., is.null)]

qsave(res, "pseudotime_micro_l1.qs")
res <- qread("pseudotime_micro_l1.qs")

rsq.pseudo <- res %>% 
  sapply(\(gene) gene$r.sq[1]) %>% 
  sort(decreasing = T)

res.filter <- res[names(rsq.pseudo)[seq(50)] %>% strsplit(".", fixed = T) %>% sget(1)]

cm.merged <- cao$data.object$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>% 
  .[rowSums(.) > 0, ]

## Predict smoothend expression 
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7

scFit <- cm.merged %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]

# Create heatmap
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

Heatmap(Smooth, 
        col = col_fun,
        cluster_columns=F, 
        cluster_rows=F, 
        show_column_names = F, 
        row_names_gp = grid::gpar(fontsize = 5))

13

cpc <- pseudotime %>% 
  names() %>% 
  strsplit("_") %>% 
  sget(1) %>% 
  factor()

res.filter %>% 
  lget("residuals") %>% 
  lapply(as.data.frame) %>% 
  lapply(setNames, "residuals") %>% 
  lapply(mutate, group = cpc, pseudotime = pseudotime) %>% 
  data.table::rbindlist(idcol = "gene") %>% 
  ggplot(aes(pseudotime, residuals, col = group)) +
  geom_smooth() +
  theme_bw() +
  facet_wrap(~gene, ncol = 5, scales = "free")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Extended Data Figure 15

Here, we use public data from Feleke et al.. We isolated microglia based on CSF1R expression and performed a quick annotation using AIF1 expression to identify activated microglia.

con.micro <- qread("feleke_micro.qs")

anno.micro <- getConosCluster(con.micro) %>% 
  factor(labels = c("Cluster1", "Activated", "Cluster2", "Cluster2"))

# We set sample groups manually as it can't be inferred from sample names
sample.groups <- c("CTRL", "CTRL", "DLB", "DLB", "DLB", "DLB", "DLB", "PDD", "PDD", "PD", "PDD", "PD", "PDD", "PD", "PDD", "PDD", "DLB", "PD", "PDD", "PD", "DLB", "PD", "PD", "CTRL", "CTRL", "CTRL", "CTRL", "CTRL") %>% 
  setNames(names(con.micro$samples))
con.clone <- con.micro$clone()
con.clone$samples <- con.micro$samples %>% .[which(sample.groups %in% c("CTRL", "PD"))]

sample.groups.clone <- sample.groups %>% .[. %in% c("CTRL", "PD")]
cao <- Cacoa$new(con.clone, sample.groups = sample.groups.clone, ref.level = "CTRL", target.level = "PD", cell.groups = anno.micro, n.cores = 32)

p <- cao$plotCellGroupSizes() + theme(line = element_blank())
p

cao$estimateCellLoadings()
p <- cao$plotCellLoadings()
## Warning: Removed 651 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 651 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
p

p <- con.clone$plotGraph(size = 0.3, groups = anno.micro, font.size = 5) + theme(line = element_blank())
p

p <- con.clone$plotGraph(gene = "AIF1", title = "AIF1", plot.na = F) + theme(line = element_blank())
p

##              used    (Mb)  gc trigger    (Mb)    max used    (Mb)
## Ncells   21546244  1150.7    39451381  2107.0    39451381  2107.0
## Vcells 5960924261 45478.3 11629033719 88722.5 11435508188 87246.1

Extended Data Figure 16

16a

We provide the results output from scHLAcount in scHLAcount.zip.

samples <- dir("scHLAcount/rydbirk/") %>% .[grepl(pattern = "_", .)]

cms <- lapply(samples, function(x) {
  labels <- read.delim(paste0("scHLAcount/rydbirk/",x,"/labels.tsv"), header = F) %>% .[,1]
  barcodes <- read.delim(paste0("scHLAcount/rydbirk/",x,"/barcodes.tsv"), header = F) %>% 
    .[,1] %>%
    sapply(function(y) paste0(x,"one_",y))
  
  mtx <- Matrix::readMM(paste0("scHLAcount/rydbirk/",x,"/count_matrix.mtx")) %>% 
    t() %>%
    `dimnames<-`(list(barcodes, labels)) %>%
    .[rowSums(.) > 0,] # Remove cells with no counts
  
  tmp <- colnames(mtx) %>%
    strsplit("*", T) %>%
    sapply(`[[`, 1)
  
  # If more than one allele per HLA type has been detected, sum per HLA type
  if(any(tmp %>% table() %>% as.numeric() > 1)) {
    cs <- colSums(mtx)
    alleles <- unique(tmp)
    
    mtx <- lapply(alleles, function(y) mtx[,tmp == y]) %>%
      lapply(function(y) {
        if(class(y) == "dgTMatrix") rowSums(y) else y
      }) %>% 
      unlist() %>% 
      Matrix(nrow = nrow(mtx)) %>%
      `rownames<-`(rownames(mtx))
  }
  
  colnames(mtx) <- unique(tmp)
  return(mtx)
}) %>%
  setNames(samples)

Calculations

# Cell-wise
cell.counts <- sapply(cms, rowSums) %>% 
  unname() %>% 
  unlist() %>% 
  `names<-`(., gsub("one_","!!",names(.)))

depth <- getConosDepth(con) %>%
  .[match(names(cell.counts), names(.))] %>%
  {data.frame(cell = names(.),
              sample = names(.) %>% strsplit("!!", T) %>% sapply(`[[`, 1),
              depth = unname(.))}

depth[is.na(depth)] <- 0

cell.group <- grepl.replace(names(cell.counts), patterns = c("CTRL","PD","MSA")) %>% as.factor()

mhc1.raw <- sapply(cms, function(x) {
  tmp <- x[,colnames(x) %in% c("A","B","C")]
  if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>% 
  unname() %>% 
  unlist() %>% 
  `names<-`(., gsub("one_","!!",names(.)))

mhc2.raw <- sapply(cms, function(x) {
  tmp <- x[,!colnames(x) %in% c("A","B","C")]
  if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>% 
  unname() %>% 
  unlist() %>% 
  `names<-`(., gsub("one_","!!",names(.)))

cell.df <- data.frame(group = cell.group,
                      counts = cell.counts,
                      mhc1 = mhc1.raw,
                      mhc2 = mhc2.raw,
                      depth = depth$depth) %>%
  mutate(., anno = anno.major[match(rownames(.), names(anno.major))],
         sample = rownames(.) %>% sapply(strsplit, "!!", T) %>% sapply(`[[`, 1)) %>%
  mutate(type = grepl.replace(anno %>% as.character(), levels(anno), c("Brain","Brain","Brain","Peripheral","Peripheral","Peripheral","Brain","Brain","Brain","Brain"))) %>%
  `rownames<-`(names(cell.counts)) %>%
  filter(!is.na(anno)) 

Load Smajic data

samples <- dir("scHLAcount/smajic") %>% .[grepl(pattern = "SRR", .)]

cms <- lapply(samples, function(x) {
  labels <- read.delim(paste0("scHLAcount/smajic/",x,"/labels.tsv"), header = F) %>% .[,1]
  barcodes <- read.delim(paste0("scHLAcount/smajic/",x,"/barcodes.tsv"), header = F) %>% 
    .[,1] %>%
    sapply(function(y) paste0(x,"_",y,"_1"))
  mtx <- Matrix::readMM(paste0("scHLAcount/smajic/",x,"/count_matrix.mtx")) %>% 
    t() %>%
    `dimnames<-`(list(barcodes, labels)) %>%
    .[rowSums(.) > 0,] # Remove cells with no counts
  
  tmp <- colnames(mtx) %>%
    strsplit("*", T) %>%
    sapply(`[[`, 1)
  
  # If more than one allele per HLA type has been detected, sum per HLA type
  if(any(tmp %>% table() %>% as.numeric() > 1)) {
    cs <- colSums(mtx)
    alleles <- unique(tmp)
    
    mtx <- lapply(alleles, function(y) mtx[,tmp == y]) %>%
      lapply(function(y) {
        if(class(y) == "dgTMatrix") rowSums(y) else y
      }) %>% 
      unlist() %>% 
      Matrix(nrow = nrow(mtx)) %>%
      `rownames<-`(rownames(mtx))
  }
  
  colnames(mtx) <- unique(tmp)
  return(mtx)
}) %>%
  setNames(samples)

# Correct naming
name.df <- data.frame(samples = samples, ids = c(rep("PD",6) %>% mapply(function(x,y) paste0(x,y), x = ., y = c(1,1,2:5)),
                                                 rep("CTRL",6) %>% mapply(function(x,y) paste0(x,y), x = ., y = 1:6)))

anno <- readRDS(paste0("scHLAcount/smajic/anno.sma.rds")) %>%
  setNames(., names(.) %>% 
             strsplit("_", T) %>% 
             lapply(function(x) {
               if(grepl("C", x[[1]])) x[[1]] %<>% gsub("C","CTRL", .)
               return(x)
             }) %>%
             sapply(function(x) paste0(x[[1]],"_",x[[2]])))

cms <- 1:12 %>% 
  lapply(function(x) {
    rnames <- cms[[x]] %>%
      rownames() %>%
      names() %>%
      sapply(function(y) paste0(name.df[x,2],"_",y)) %>%
      unname()
    
    rownames(cms[[x]]) <- rnames
    return(cms[[x]])
  }) %>%
  setNames(names(cms))

Calculations

cell.df.rydbirk <- cell.df

# Cell-wise
cell.counts <- sapply(cms, rowSums) %>% 
  unname() %>% 
  unlist()

rem <- cell.counts %>%
  names() %>%
  table() %>%
  .[. > 1] %>%
  names()

cell.counts %<>% .[!names(.) %in% rem]

cell.group <- grepl.replace(names(cell.counts), patterns = c("CTRL","PD")) %>% as.factor()

mhc1.raw <- sapply(cms, function(x) {
  tmp <- x[,colnames(x) %in% c("A","B","C")]
  if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>% 
  unname() %>% 
  unlist() %>%
  .[!names(.) %in% rem]

mhc2.raw <- sapply(cms, function(x) {
  tmp <- x[,!colnames(x) %in% c("A","B","C")]
  if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>% 
  unname() %>% 
  unlist() %>%
  .[!names(.) %in% rem]

cell.df <- data.frame(group = cell.group,
                      counts = cell.counts,
                      mhc1 = mhc1.raw,
                      mhc2 = mhc2.raw) %>%
  mutate(., anno = anno[match(rownames(.), names(anno))],
         sample = rownames(.) %>% sapply(strsplit, "_", T) %>% sapply(`[[`, 1)) %>%
  `rownames<-`(names(cell.counts)) %>%
  filter(!is.na(anno))

Integrate with our data and plot

cell.df.int <- rbind(cell.df.rydbirk %>% 
                       select(-c("depth","type")) %>%
                       mutate(origin = "Rydbirk"),
                     cell.df %>% 
                       mutate(anno = anno %>% factor(labels = c("Astrocytes","Exc. neurons","Inh. neurons","Endothelial","Eppendymal","Exc. neurons","Inh. neurons","Inh. neurons","Microglia","Oligodendrocytes","OPCs","Pericytes")),
                              origin = "Smajic")) %>%
  filter(! anno %in% c("Blood_immune","Endothelial","Eppendymal","Mural","Pericytes","Pericytes/endothelial","Immune")) %>%
  filter(!is.na(anno)) %>% 
  mutate(anno = anno %>% factor(),
         origin = origin %>% factor(),
         anno = anno %>% unname() %>% factor(),
         sample = sample %>% unname())

cell.anno.df <-
  cell.df.int %>%
  group_by(anno, sample, group, origin) %>%
  summarise(mhc1 = mean(mhc1),
            mhc2 = mean(mhc2),
            counts = mean(counts),
            cells = length(sample)) %>%
  as.data.frame() %>% 
  select(-cells, -counts, -sample)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by anno, sample, group, and origin.
## ℹ Output is grouped by anno, sample, and group.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(anno, sample, group, origin))` for per-operation
##   grouping (`?dplyr::dplyr_by`) instead.
p.text1 <- data.frame(signif = c("n.s.", "n.s.", "n.s.", "n.s.", "n.s.", "0.040", "0.032", "n.s."), 
                      mhc1 = 4.5,
                      anno = levels(cell.anno.df$anno))

p.text2 <- data.frame(signif = c("0.00057"), 
                      mhc2 = 8.5,
                      anno = "             Microglia")

cowplot::plot_grid(plotlist = list(
  ggplot(cell.anno.df, 
         aes(anno, 
             mhc1)) +
    geom_boxplot(outlier.shape = NA, 
                 aes(fill = group)) +
    geom_point(position = position_jitterdodge(jitter.width = 0.2), 
               aes(fill = group, 
                   col = origin, 
                   shape = group),
               size = 2) +
    scale_color_manual(values = c("black",
                                  "grey50")) +
    labs(x = "", 
         y = "Mean counts per sample", 
         title = "MHC-I counts", 
         fill = "", 
         col = "", 
         shape = " ") +
    geom_text(data = p.text1, aes(label = signif)) +
    theme_bw() +
    scale_fill_manual(values = pal.major) +
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
          line = element_blank()),
  ggplot(cell.anno.df %>% filter(anno == "Microglia") %>% mutate(anno = "             Microglia"), aes(anno, mhc2)) +
    geom_boxplot(outlier.shape = NA, aes(fill = group)) +
    geom_point(position = position_jitterdodge(jitter.width = 0.2), aes(fill = group, col = origin, shape = group), size = 2) +
    scale_color_manual(values = c("black","grey50")) +
    labs(x = "", y = "", title = "MHC-II counts", fill = "", col = "", shape = " ") +
    geom_text(data = p.text2, aes(label = signif)) +
    theme_bw() +
    scale_fill_manual(values = pal.major) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
          line = element_blank())
), ncol = 2, rel_widths = c(2,0.9))

Kruskal-Wallis, asses differences per cell type

cell.anno.df %>% 
  group_by(anno) %>% 
  kruskal_test(mhc1 ~ group) %>% 
  data.frame()
##               anno  .y.  n statistic df      p         method
## 1       Astrocytes mhc1 38 5.4099627  2 0.0669 Kruskal-Wallis
## 2        Microglia mhc1 37 3.5900071  2 0.1660 Kruskal-Wallis
## 3 Oligodendrocytes mhc1 38 4.0883941  2 0.1290 Kruskal-Wallis
## 4             PVMs mhc1 24 3.5403947  2 0.1700 Kruskal-Wallis
## 5             OPCs mhc1 38 0.9689609  2 0.6160 Kruskal-Wallis
## 6     Inh. neurons mhc1 31 6.4335165  2 0.0401 Kruskal-Wallis
## 7     Exc. neurons mhc1 22 6.8860651  2 0.0320 Kruskal-Wallis
## 8              MSN mhc1 21 4.9345432  2 0.0848 Kruskal-Wallis
cell.anno.df %>% 
  filter(anno == "Microglia") %>% 
  group_by(anno) %>% 
  kruskal_test(mhc2 ~ group) %>% 
  data.frame()
##        anno  .y.  n statistic df        p         method
## 1 Microglia mhc2 37  14.92493  2 0.000574 Kruskal-Wallis

Wilcoxon, assess differences between our data and Smajic per condition per cell type

cell.anno.df %>% 
  filter(group != "MSA", 
         !anno %in% c("MSN","PVMs")) %>%
  group_by(anno, group) %>% 
  wilcox_test(mhc1 ~ origin) %>% 
  data.frame()
##                anno group  .y.  group1 group2 n1 n2 statistic     p
## 1        Astrocytes  CTRL mhc1 Rydbirk Smajic 10  6        27 0.792
## 2        Astrocytes    PD mhc1 Rydbirk Smajic 11  5        26 0.913
## 3         Microglia  CTRL mhc1 Rydbirk Smajic  9  6        21 0.529
## 4         Microglia    PD mhc1 Rydbirk Smajic 11  5        30 0.827
## 5  Oligodendrocytes  CTRL mhc1 Rydbirk Smajic 10  6        34 0.713
## 6  Oligodendrocytes    PD mhc1 Rydbirk Smajic 11  5        26 0.913
## 7              OPCs  CTRL mhc1 Rydbirk Smajic 10  6        15 0.118
## 8              OPCs    PD mhc1 Rydbirk Smajic 11  5        31 0.743
## 9      Inh. neurons  CTRL mhc1 Rydbirk Smajic  8  6        10 0.080
## 10     Inh. neurons    PD mhc1 Rydbirk Smajic  7  5        23 0.432
## 11     Exc. neurons  CTRL mhc1 Rydbirk Smajic  6  6        10 0.240
## 12     Exc. neurons    PD mhc1 Rydbirk Smajic  3  5         5 0.549
cell.anno.df %>% 
  filter(group != "MSA", 
         anno == "Microglia") %>%
  group_by(anno, group) %>% 
  wilcox_test(mhc2 ~ origin) %>% 
  data.frame()
##        anno group  .y.  group1 group2 n1 n2 statistic     p
## 1 Microglia  CTRL mhc2 Rydbirk Smajic  9  6        19 0.368
## 2 Microglia    PD mhc2 Rydbirk Smajic 11  5        40 0.180

16b

We provide a curated list with MHC and cytokine genes. Please note, the order of clusters may change.

dat <- read.table("MHC_cytokines_curated.tsv", header = T, sep = "\t")

cm.merged <- con$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.major)] %>% 
  .[rownames(.) %in% (dat$name[dat$class == "cytokine"] %>% gsub("-","",.)), ] %>% 
  .[rowSums(.) > 0,]

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.major %>% 
  .[!is.na(.) & (. %in% c("Microglia", "PVMs"))] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  {. + 1} %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
cm.pseudo %<>% 
  Matrix::t() %>% 
  scale() %>%
  Matrix::t()

ord <- cm.pseudo %>% 
  colnames() %>% 
  {data.frame(id = .)} %>% 
  mutate(., 
         ct = strsplit(id, "!!") %>% 
           sget(2),
         condition = strsplit(id, "!!|_") %>% 
           sget(1)) %>% 
  arrange(ct, condition) %>% 
  pull(id)

cm.pseudo %<>% 
  .[, match(ord, colnames(.))]

cm.pseudo %<>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "gene") %>% 
  reshape2::melt(id.vars = c("gene")) %>% 
  mutate(variable = as.character(variable),
         condition = strsplit(variable, "!!|_") %>% 
           sget(1),
         ct = strsplit(variable, "!!") %>% 
           sget(2)) %>% 
  group_by(condition, ct, gene) %>% 
  summarize(m = mean(value)) %>% 
  ungroup() %>% 
  mutate(variable = paste(condition, ct, sep = "!!")) %>% 
  select(-condition, -ct) %>% 
  reshape2::dcast(gene ~ variable, value.var = "m") %>% 
  tibble::column_to_rownames(var = "gene")
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by condition, ct, and gene.
## ℹ Output is grouped by condition and ct.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(condition, ct, gene))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
tmp <- cm.pseudo %>% 
  colnames() %>% 
  strsplit("_|!!")

clusters <- tmp %>%
  sget(2)

condition <- tmp %>% 
  sget(1)

ha <- ComplexHeatmap::HeatmapAnnotation(Annotation = clusters,
                                        Condition = condition,
                                        col = list(Annotation = c("Microglia" = unname(pal.major["Microglia"]), 
                                                                  "PVMs" = unname(pal.major["PVMs"])),
                                                   Condition = c("CTRL" = unname(pal.major["CTRL"]), 
                                                                 "MSA" = unname(pal.major["MSA"]),
                                                                 "PD" = unname(pal.major["PD"]))
                                        ))

ComplexHeatmap::Heatmap(cm.pseudo,
                        name = "Expression", 
                        show_column_names = F, 
                        show_row_names = T, 
                        cluster_columns = F, 
                        show_column_dend = F, 
                        cluster_rows = T, 
                        top_annotation = ha, 
                        show_row_dend = F, 
                        column_split = colnames(cm.pseudo) %>% strsplit("!!|_") %>% sget(2) %>% unname() %>% factor(),
                        col=circlize::colorRamp2(c(-1, 1.2), c("white", "firebrick")),
                        row_km = 4)
## Warning: The input is a data frame-like object, convert it to a matrix.

Extended Data Figure 17

17c

c("FOSL2", "TCF4", "STAT1", "NR3C1", "ETV7", "THRB", "BPTF", "RELB", "CEBPD", "HIF1A", "ELF1", "CREM", "ELF2", "ZNF44", "CHD1", "POU2F1", "GTF2IRD1", "NFIA", "NFE2L2", "FOXP1", "FOXO3") %>% 
  clusterProfiler::enrichGO("org.Hs.eg.db", "SYMBOL", "BP") %>% 
  enrichplot::pairwise_termsim() %>% 
  enrichplot::treeplot() +
  scale_fill_manual(values = brewer.pal(6, "Greys")[-1])
## 
## Warning: `aes_()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`
## ℹ The deprecated feature was likely used in the ggtree package.
##   Please report the issue at <https://github.com/YuLab-SMU/ggtree/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • as.Date = as.Date
## • yscale_mapping = yscale_mapping
## • show.legend = FALSE
## ℹ Did you misspell an argument name?
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

Extended Data Figure 18

18a

We provide the results from scDRS based on Nalls et al. GWAS summary stats. For calculation of these, see scDRS_PD.ipynb.

dat.scores <- qread("gwas/nalls/PD.full_score.qs")

# Load downstream analyses for cell types versus trait
dat.ct <- read.delim("gwas/nalls/downstream_celltype.tsv", header = F, sep = "\t") %$%  
  strsplit(V1, " ") %>% 
  unlist() %>% 
  gsub("\\", "", ., fixed = T) %>% 
  gsub("\n", "", ., fixed = T) %>% 
  .[!sapply(., \(x) x == "")] %>% 
  .[c(1:5,69:72, # Header
      8:12,75:78, # Astro
      15:19,81:84, # EN
      21:25,86:89, # Immune
      28:32,92:95, # IN
      34:38,97:100, # MSN
      40:44,102:105, # MIC
      46:50,107:110, # OPCs
      52:56,112:115, # OL
      58:62,117:120, # PVMs
      64:68,122:125 # Peri
  )]

dat.sign <- dat.ct %>%
  split(seq(9)) %>% 
  bind_rows() %>% 
  {`colnames<-`(.[-1, ], .[1, ])} %>% 
  as.data.frame()

dat.sign %<>%  mutate(group = c("Astrocytes",
                                "Exc. neurons",
                                "Immune",
                                "Inh. neurons",
                                "MSN",
                                "Microglia",
                                "OPCs",
                                "Oligodendrocytes",
                                "PVMs",
                                "Pericytes/endothelial"))

dat.sign %<>% 
  filter(!group == "Unknown") %>% 
  mutate(assoc_sign = as.numeric(assoc_mcp) %>% sapply(\(x) if (x <= 0.05) paste("*", formatC(x, digits = 2), sep = " ") else ""),
         hetero_sign = as.numeric(hetero_mcp) %>% sapply(\(x) if (x <= 0.05) paste("#", formatC(x, digits = 2), sep = " ")  else " ")) %>% 
  mutate(sign = paste0(assoc_sign,"\n",hetero_sign) %>% gsub("\n ", "", .)) %>%
  dplyr::rename(type = group)

p <- data.frame(cell = dat.scores$X,
                score = dat.scores$norm_score,
                type = anno.major[match(dat.scores$X,
                                        names(anno.major))]) %>%
  group_by(type) %>%
  summarize(m = median(score)) %>%
  filter(!is.na(type)) %>% 
  mutate(type = as.character(type)) %$% 
  arrange(., desc(m)) %>%
  mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
  ggplot(aes(type, m, fill = type)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = sign), vjust = -0.5) +
  theme_bw() + 
  labs(y = "Mean score", x = "", title = "scDRS mean scores and associations") +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        line = element_blank()) +
  ylim(c(-0.6, 2.6)) +
  scale_fill_manual(values = pal.major) +
  geom_hline(yintercept = 0)

p

18b

dat.ct <- read.delim("gwas/nalls/downstream_celltype_microglia.tsv", header = F, sep = "\t") %$%  
  strsplit(V1, " ") %>% 
  unlist() %>% 
  gsub("\\", "", ., fixed = T) %>% 
  gsub("\n", "", ., fixed = T) %>% 
  gsub("group", "", .) %>% 
  .[!sapply(., \(x) x == "")] %>% 
  {c("group", .[seq(35)])} %>% 
  matrix(ncol = 6, byrow = T) %>% 
  as.data.frame() %>% 
  `colnames<-`(., .[1, ]) %>% 
  .[-1, ]

anno.micro <- qread("anno_micro_pvm.qs") %>% 
  renameAnnotation("Steady-state","MIC_steady-state") %>% 
  renameAnnotation("Intermediate1","MIC_intermediate1") %>% 
  renameAnnotation("Intermediate2","MIC_intermediate2") %>% 
  renameAnnotation("Activated","MIC_activated") %>% 
  .[!. == "PVMs"] %>% 
  factor()

dat.sign <- 
  dat.ct %>% 
  filter(!group == "Unknown") %>% 
  mutate(assoc_sign = as.numeric(assoc_mcp) %>% sapply(\(x) if (x <= 0.05) paste("*", formatC(x, digits = 2), sep = " ") else ""),
         hetero_sign = as.numeric(hetero_mcp) %>% sapply(\(x) if (x <= 0.05) paste("#", formatC(x, digits = 2), sep = " ")  else " ")) %>% 
  mutate(sign = paste0(assoc_sign,"\n",hetero_sign) %>% gsub("\n ", "", .)) %>%
  dplyr::rename(type = group)

p <- data.frame(cell = dat.scores$X %>% gsub("one_", "!!", .),
                score = dat.scores$norm_score,
                type = anno.major[match(dat.scores$X,
                                        names(anno.major))]) %>%
  filter(cell %in% names(anno.micro)) %>% 
  mutate(type = factor(anno.micro[.$cell])) %>% 
  group_by(type) %>%
  summarize(m = mean(score)) %>%
  filter(!is.na(type)) %>% 
  mutate(type = as.character(type)) %>% 
  arrange(desc(m)) %>% 
  mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
  mutate(type = factor(type, levels = type)) %>% 
  ggplot(aes(type, m, fill = type)) + 
  geom_bar(stat = "identity") +
  geom_text(aes(label = sign), vjust = -0.5, nudge_y = -0.05) +
  theme_bw() + 
  theme(line = element_blank()) +
  labs(y = "Mean score", x = "", title = "scDRS for microglia") +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  ylim(c(0, 2)) +
  scale_fill_manual(values = pal.major)

p

18c

We provide the results from scDRS based on Chia et al. GWAS summary stats. For calculation of these, see scDRS_MSA.ipynb.

dat.scores <- qread("gwas/chia/MSA.full_score.qs")

# Load downstream analyses for cell types versus trait
dat.ct <- read.delim("gwas/chia/downstream_celltype.tsv", header = F, sep = "\t") %$%  
  strsplit(V1, " ") %>% 
  unlist() %>% 
  gsub("\\", "", ., fixed = T) %>% 
  gsub("\n", "", ., fixed = T) %>% 
  .[!sapply(., \(x) x == "")] %>% 
  .[c(1:5,69:72, # Header
      8:12,75:78, # Astro
      15:19,81:84, # EN
      21:25,86:89, # Immune
      28:32,92:95, # IN
      34:38,97:100, # MSN
      40:44,102:105, # MIC
      46:50,107:110, # OPCs
      52:56,112:115, # OL
      58:62,117:120, # PVMs
      64:68,122:125 # Peri
  )]

dat.sign <- dat.ct %>%
  split(seq(9)) %>% 
  bind_rows() %>% 
  {`colnames<-`(.[-1, ], .[1, ])} %>% 
  as.data.frame()

dat.sign %<>%  mutate(group = c("Astrocytes",
                                "Exc. neurons",
                                "Immune",
                                "Inh. neurons",
                                "MSN",
                                "Microglia",
                                "OPCs",
                                "Oligodendrocytes",
                                "PVMs",
                                "Pericytes/endothelial"))

dat.sign %<>% 
  filter(!group == "Unknown") %>% 
  mutate(assoc_sign = as.numeric(assoc_mcp) %>% sapply(\(x) if (x <= 0.05) paste("*", formatC(x, digits = 2), sep = " ") else ""),
         hetero_sign = as.numeric(hetero_mcp) %>% sapply(\(x) if (x <= 0.05) paste("#", formatC(x, digits = 2), sep = " ")  else " ")) %>% 
  mutate(sign = paste0(assoc_sign,"\n",hetero_sign) %>% gsub("\n ", "", .)) %>%
  dplyr::rename(type = group)

p <- data.frame(cell = dat.scores$X,
                score = dat.scores$norm_score,
                type = anno.major[match(dat.scores$X,
                                        names(anno.major))]) %>%
  group_by(type) %>%
  summarize(m = median(score)) %>%
  filter(!is.na(type)) %>% 
  mutate(type = as.character(type)) %$% 
  arrange(., desc(m)) %>%
  mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
  ggplot(aes(type, m, fill = type)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = sign), vjust = .5) +
  theme_bw() + 
  labs(y = "Mean score", x = "", title = "scDRS mean scores and associations") +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        line = element_blank()) +
  ylim(c(-0.5, 0.5)) +
  scale_fill_manual(values = pal.major) +
  geom_hline(yintercept = 0)

p

Extended Data Figure 19

All subfigures were made in GraphPad Prism. Data are available in Phagocytosis_assay_triplicates_raw_data_HH.tsv (Copenhagen experiment) or HOUSTON.tsv (Houston experiment).

Extended Data Table 2

Not rerun

anno.major <- qread("anno_major.qs")

anno.micro <- qread("anno_micro_pvm.qs") %>% 
  renameAnnotation("Steady-state","MIC_steady-state") %>% 
  renameAnnotation("Intermediate1","MIC_intermediate1") %>% 
  renameAnnotation("Intermediate2","MIC_intermediate2") %>% 
  renameAnnotation("Activated","MIC_activated")

anno.glia <- c("anno_astro.qs",
               "anno_oligo.qs",
               "anno_opc.qs") %>% 
  lapply(qread) %>% 
  Reduce(c, .) %>% 
  factor() %>% 
  renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>% 
  renameAnnotation("Reactive_astrocytes", "AS_reactive") %>% 
  renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
  renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>% 
  renameAnnotation("Reactive_SCGZ", "OL_SGCZ")

anno.neurons <- qread("anno_neurons.qs")

# Minor final
anno.minor <- anno.major[!anno.major %in% c("Inh. neurons", "Exc. neurons", "PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>% 
  {factor(c(., anno.neurons, anno.glia, anno.micro))} %>% 
  factor(levels = sort(levels(.)))

# Major final
anno.major %<>% .[!names(.) %in% names(anno.neurons)] %>% 
  {factor(c(., anno.neurons))} %>% 
  collapseAnnotation("MSN") %>% 
  collapseAnnotation("GABAergic") %>% 
  collapseAnnotation("GLUergic") %>% 
  renameAnnotation("MSN", "Medium spiny neurons") %>% 
  renameAnnotation("GABAergic", "GABAergic neurons") %>% 
  renameAnnotation("GLUergic", "GLUergic neurons") %>% 
  factor(levels = sort(levels(.)))
con.major$n.cores <- 32 # Major object
markers.major <- con.major$getDifferentialGenes(groups = anno.major, 
                                                z.threshold = 1, 
                                                upregulated.only = T, 
                                                append.specificity.metrics = T, 
                                                append.auc = T)

markers.major %>% 
  lapply(filter, PAdj <= 0.05) %>% 
  bind_rows(.id = "Celltype") %>% 
  write.table("Table SX - Celltype markers, major annotation.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 4

Not rerun

res <- list(
  Major = list(CTRLvsMSA = "cao_major_msa.qs",
               CTRLvsPD = "cao_major_pd.qs",
               PDvsMSA = "cao_major_dis.qs"),
  Neurons = list(CTRLvsMSA = "cao_neurons_msa.qs",
                 CTRLvsPD = "cao_neurons_pd.qs",
                 PDvsMSA = "cao_neurons_dis.qs"),
  Glial = list(CTRLvsMSA = "cao_oligo_astro_opc_msa.qs",
               CTRLvsPD = "cao_oligo_astro_opc_pd.qs",
               PDvsMSA = "cao_oligo_astro_opc_dis.qs"),
  Micro_PVM = list(CTRLvsMSA = "cao_micro_pvm_msa.qs",
                   CTRLvsPD = "cao_micro_pvm_pd.qs",
                   PDvsMSA = "cao_micro_pvm_dis.qs")) %>% 
  lapply(\(celltype) sccore::plapply(celltype, \(comp) qread(comp, nthreads = 10)$test.results$coda, n.cores = 3)) %>% 
  lapply(lapply, \(x) data.frame(subtype = names(x$padj),
                                 loadings.min = apply(x$loadings, 1, min),
                                 loadings.median = apply(x$loadings, 1, median),
                                 loadings.max = apply(x$loadings, 1, max),
                                 padj = unname(x$padj))) %>% 
  lapply(bind_rows, .id = "Comparison") %>% 
  bind_rows(.id = "Cell type") %>% 
  `rownames<-`(NULL)

res %>% 
  write.table("Table SX - Loadings.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 5

Not rerun

cao <- Cacoa$new(data.object = con, # Major object
                 cell.groups = anno.major, 
                 ref.level = "CTRL", 
                 target.level = "DISEASE", 
                 sample.groups = con$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sget(1) %>%
                   {ifelse(. == "CTRL", "CTRL", "DISEASE")} %>% 
                   `names<-`(con$samples %>% 
                               names()),
                 sample.groups.palette = c(c("yellow", RColorBrewer::brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA"))))

cao$sample.groups <- con$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sget(1) %>% 
  `names<-`(con$samples %>% 
              names())

res <- cao$plotCellGroupSizes(show.significance = TRUE, 
                              filter.empty.cell.types = FALSE)$data %>% 
  group_by(group, variable) %>% 
  summarize(Min_proportion = min(value),
            Mean_proportion = mean(value),
            Max_proportion = max(value)) %>% 
  dplyr::rename(Condition = group,
                Celltype = variable)

res %>% 
  write.table("Table SX - Proportions, major annotation.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 6

Not rerun

anno.major <- qread("anno_major.qs")

anno.micro <- qread("anno_micro_pvm.qs") %>% 
  renameAnnotation("Steady-state","MIC_steady-state") %>% 
  renameAnnotation("Intermediate1","MIC_intermediate1") %>% 
  renameAnnotation("Intermediate2","MIC_intermediate2") %>% 
  renameAnnotation("Activated","MIC_activated")

anno.glia <- c("anno_astro.qs",
               "anno_oligo.qs",
               "anno_opc.qs") %>% 
  lapply(qread) %>% 
  Reduce(c, .) %>% 
  factor() %>% 
  renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>% 
  renameAnnotation("Reactive_astrocytes", "AS_reactive") %>% 
  renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
  renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>% 
  renameAnnotation("Reactive_SCGZ", "OL_SGCZ")

anno.neurons <- qread("anno_neurons.qs")

# Minor final
anno.minor <- anno.major[!anno.major %in% c("Inh. neurons", "Exc. neurons", "PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>% 
  {factor(c(., anno.neurons, anno.glia, anno.micro))} %>% 
  factor(levels = sort(levels(.)))

# Major final
anno.major %<>% .[!names(.) %in% names(anno.neurons)] %>% 
  {factor(c(., anno.neurons))} %>% 
  collapseAnnotation("MSN") %>% 
  collapseAnnotation("GABAergic") %>% 
  collapseAnnotation("GLUergic") %>% 
  renameAnnotation("MSN", "Medium spiny neurons") %>% 
  renameAnnotation("GABAergic", "GABAergic neurons") %>% 
  renameAnnotation("GLUergic", "GLUergic neurons") %>% 
  factor(levels = sort(levels(.)))
markers.minor <- con.major$getDifferentialGenes(groups = anno.minor, 
                                                z.threshold = 1, 
                                                upregulated.only = T, 
                                                append.specificity.metrics = T, 
                                                append.auc = T)

markers.minor %>% 
  lapply(filter, PAdj <= 0.05) %>% 
  bind_rows(.id = "Celltype") %>% 
  write.table("Table SX - Celltype markers, minor annotation.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 7

Not rerun

cao <- Cacoa$new(data.object = con, 
                 cell.groups = anno.minor, 
                 ref.level = "CTRL", 
                 target.level = "DISEASE", 
                 sample.groups = con$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sget(1) %>%
                   {ifelse(. == "CTRL", "CTRL", "DISEASE")} %>% 
                   `names<-`(con$samples %>% 
                               names()),
                 sample.groups.palette = c(c("yellow", RColorBrewer::brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA"))))

cao$sample.groups <- con$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sget(1) %>% 
  `names<-`(con$samples %>% 
              names())

res <- cao$plotCellGroupSizes(show.significance = TRUE, 
                              filter.empty.cell.types = FALSE)$data %>% 
  group_by(group, variable) %>% 
  summarize(Min_proportion = min(value),
            Mean_proportion = mean(value),
            Max_proportion = max(value)) %>% 
  dplyr::rename(Condition = group,
                Celltype = variable)

res %>% 
  write.table("Table SX - Proportions, minor annotation.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 8

Not rerun

First, correct OPCs/COPs for age covariate according to scITD calculations

cao_msa <- qread("cao_opc_msa.qs", nthreads = 10)
cao_msa$plot.theme <- theme_bw()
cao_pd <- qread("cao_opc_pd.qs", nthreads = 10)
cao_pd$plot.theme <- theme_bw()
cao_dis <- qread("cao_opc_dis.qs", nthreads = 10)
cao_dis$plot.theme <- theme_bw()

metadata.all <- read.delim("metadata.tsv")

meta.df.msa <- metadata.all %>% 
  filter(sample %in% names(cao_msa$data.object$samples)) %>% 
  tibble::column_to_rownames("sample") %>% 
  select(age)

meta.df.pd <- metadata.all %>% 
  filter(sample %in% names(cao_pd$data.object$samples)) %>% 
  tibble::column_to_rownames("sample") %>% 
  select(age)

meta.df.dis <- metadata.all %>% 
  filter(sample %in% names(cao_dis$data.object$samples)) %>% 
  tibble::column_to_rownames("sample") %>% 
  select(age)

all.genes_msa <- cao_msa$data.object$samples %>% 
  lget("misc") %>% 
  lget("rawCounts") %>% 
  lapply(colnames) %>% 
  Reduce(union, .)

genes.to.omit_msa <- all.genes_msa %>% 
  .[grepl("MT-|RPL|RPS", .)]

all.genes_pd <- cao_pd$data.object$samples %>% 
  lget("misc") %>% 
  lget("rawCounts") %>% 
  lapply(colnames) %>% 
  Reduce(union, .)

genes.to.omit_pd <- all.genes_pd %>% 
  .[grepl("MT-|RPL|RPS", .)]

all.genes_dis <- cao_dis$data.object$samples %>% 
  lget("misc") %>% 
  lget("rawCounts") %>% 
  lapply(colnames) %>% 
  Reduce(union, .)

genes.to.omit_dis <- all.genes_dis %>% 
  .[grepl("MT-|RPL|RPS", .)]

cao_msa$estimateDEPerCellType(covariates = meta.df.msa, name = "de_age", genes.to.omit = genes.to.omit_msa)
cao_pd$estimateDEPerCellType(covariates = meta.df.pd, name = "de_age", genes.to.omit = genes.to.omit_pd)
cao_dis$estimateDEPerCellType(covariates = meta.df.dis, name = "de_age", genes.to.omit = genes.to.omit_dis)

cao_msa$estimateOntology("GSEA", name = "gsea_age", de.name = "de_age", org.db = org.Hs.eg.db::org.Hs.eg.db)
cao_pd$estimateOntology("GSEA", name = "gsea_age", de.name = "de_age", org.db = org.Hs.eg.db::org.Hs.eg.db)
cao_dis$estimateOntology("GSEA", name = "gsea_age", de.name = "de_age", org.db = org.Hs.eg.db::org.Hs.eg.db)

qsave(cao_msa, "cao_opc_msa.qs", nthreads = 10)
qsave(cao_pd, "cao_opc_pd.qs", nthreads = 10)
qsave(cao_dis, "cao_opc_dis.qs", nthreads = 10)
go.list <- dir("cacoa/", full.names = T)[-c(4:7, 15:18, 22, 26)] %>% 
  lapply(qread, nthreads = 10) %>% 
  lapply(purrr::pluck, "test.results") %>% 
  lapply(\(x) if ("gsea_age" %in% names(x)) x$gsea_age else x$gsea) %>% # To include age-corrected calculations for OPCs
  setNames(dir("cacoa/")[-c(4:7, 15:18, 22, 26)])

go.list %>% 
  lget("res") %>% 
  lapply(lapply, lapply, slot, "result") %>% 
  lapply(lapply, data.table::rbindlist, idcol = "Category") %>% 
  lapply(data.table::rbindlist, idcol = "Celltype") %>% 
  data.table::rbindlist(idcol = "Comparison") %>% 
  mutate(Comparison = sapply(Comparison, \(comp) {
    if (grepl("dis", comp)) "PD vs MSA" else if (grepl("msa", comp)) "CTRL vs MSA" else "CTRL vs PD"
  })) %>% 
  filter(p.adjust <= 0.05) %>% 
  write.table("GSEA.csv", sep = ",", dec = ".", row.names = F)

Extended Data Table 9

Not rerun

de.list <- dir("cacoa/", full.names = T)[-c(4:7, 15:18, 22, 26)] %>% 
  lapply(qread, nthreads = 10) %>% 
  lapply(purrr::pluck, "test.results") %>% 
  lapply(\(x) if ("de_age" %in% names(x)) x$de_age else x$de) %>% # To include results from age-corrected calculations for OPCs
  setNames(dir("cacoa/")[-c(4:7, 15:18, 22, 26)])

de.list %>% 
  lapply(lget, "res") %>% 
  lapply(data.table::rbindlist, idcol = "Celltype") %>% 
  lapply(select, Celltype, baseMean, log2FoldChange, lfcSE, stat, pvalue, padj, Gene, Z, Za, CellFrac, SampleFrac) %>% 
  data.table::rbindlist(idcol = "Comparison") %>% 
  mutate(Comparison = sapply(Comparison, \(comp) {
    if (grepl("dis", comp)) "PD vs MSA" else if (grepl("msa", comp)) "CTRL vs MSA" else "CTRL vs PD"
  })) %>% 
  filter(padj <= 0.05) %>% 
  write.table("DEGs.csv", sep = ",", dec = ".", row.names = F)

Extended Data Table 10

Not rerun. We need the res.filter object from calculation of smoothed heatmap for microglia activation trajectory.

go <- clusterProfiler::enrichGO(names(res.filter), 
                                OrgDb = "org.Hs.eg.db", 
                                keyType = "SYMBOL", 
                                universe = colnames(con$getJointCountMatrix()))

go@result %>% 
  filter(pvalue <= 0.05) %>% 
  write.table("Microglia_pseudotime_activation-lineage_GO.tsv", sep = "\t", row.names = F)

Extended Data Table 11

Not rerun. We need the Smooth object for the microglia to PVM trajectory.

go.list <- list(repressed = rownames(Smooth)[1:which(rownames(Smooth) == "EPB41L2")],
                induced = rownames(Smooth)[(which(rownames(Smooth) == "EPB41L2")+1):nrow(Smooth)]) %>% 
  lapply(clusterProfiler::enrichGO,
         OrgDb = "org.Hs.eg.db", 
         keyType = "SYMBOL", 
         universe = colnames(con$getJointCountMatrix()))

go.list %>% 
  lapply(\(x) x@result) %>% 
  lapply(filter, pvalue <= 0.05) %>% 
  data.table::rbindlist(idcol = "geneset") %>% 
  write.table("Microglia_pseudotime_PVM-lineage_GO.tsv", sep = "\t", row.names = F)

Extended Data Table 14

Not rerun. We use the publicly available data from Rydbirk et al..

# Create universes
dat.all <- read.delim("CSF-Sv_Soluble-frac__Report.tsv")

universe.gene <- dat.all %>% 
  pull(PG.Genes) %>% 
  unique()

universe.prot <- dat.all %>% 
  pull(PG.UniProtIds)

# DEPs
dat <- read.delim("Soluble_fraction.txt")

# KEGG, MSA

kegg.res <- dat %>% 
  filter(Disease.group == "MSA") %>% 
  pull(UniProt.ID) %>% 
  clusterProfiler::enrichKEGG(keyType = "uniprot", pvalueCutoff = 0.2, universe = universe.prot, organism = "hsa")

# KEGG, PD

kegg.res.pd <- dat %>% 
  filter(Disease.group == "PD") %>% 
  pull(UniProt.ID) %>% 
  clusterProfiler::enrichKEGG(keyType = "uniprot", pvalueCutoff = 0.2, universe = universe.prot, organism = "hsa")

# Write table

list(MSA = kegg.res, PD = kegg.res.pd) %>% 
  lapply(getElement, "result") %>% 
  lapply(dplyr::select, Description, GeneRatio, BgRatio, pvalue, p.adjust, ID, geneID) %>% 
  bind_rows(.id = "Disease") %>% 
  write.table("TableS14_KEGG.tsv", sep = "\t", row.names = F)

Session info

Time to knit

Sys.time() - tt
## Time difference of 26.09581 mins
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2025.3/lib/libmkl_gf_lp64.so.2;  LAPACK version 3.12.1
## 
## Random number generation:
##  RNG:     L'Ecuyer-CMRG 
##  Normal:  Inversion 
##  Sample:  Rejection 
##  
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggraph_2.2.2                scITD_1.0.4                
##  [3] gamm4_0.2-7                 mgcv_1.9-4                 
##  [5] nlme_3.1-168                lme4_1.1-38                
##  [7] slingshot_2.16.0            TrajectoryUtils_1.16.1     
##  [9] SingleCellExperiment_1.30.1 SummarizedExperiment_1.38.1
## [11] Biobase_2.68.0              GenomicRanges_1.60.0       
## [13] GenomeInfoDb_1.44.3         IRanges_2.42.0             
## [15] S4Vectors_0.46.0            BiocGenerics_0.54.1        
## [17] generics_0.1.4              MatrixGenerics_1.20.0      
## [19] matrixStats_1.5.0           princurve_2.1.6            
## [21] rstatix_0.7.3               stringr_1.6.0              
## [23] ComplexHeatmap_2.24.1       circlize_0.4.17            
## [25] ggrepel_0.9.6               CRMetrics_0.3.2            
## [27] RColorBrewer_1.1-3          ggpmisc_0.6.3              
## [29] ggpp_0.6.0                  ggforce_0.5.0              
## [31] reshape2_1.4.5              STRINGdb_2.20.0            
## [33] ggpubr_0.6.2                cowplot_1.2.0              
## [35] ggsci_4.2.0                 ggplot2_4.0.2              
## [37] qs_0.27.3                   scHelper_0.0.5             
## [39] sccore_1.0.5                cacoa_0.4.0                
## [41] dplyr_1.2.0                 magrittr_2.0.4             
## [43] conos_1.5.2                 igraph_2.2.1               
## [45] Matrix_1.7-4               
## 
## loaded via a namespace (and not attached):
##   [1] rTensor_1.4.9             R.methodsS3_1.8.2        
##   [3] dichromat_2.0-0.1         Rmisc_1.5.1              
##   [5] Biostrings_2.76.0         vctrs_0.7.1              
##   [7] ggtangle_0.1.1            RApiSerialize_0.1.4      
##   [9] digest_0.6.39             png_0.1-8                
##  [11] shape_1.4.6.1             registry_0.5-1           
##  [13] combinat_0.0-8            magick_2.9.0             
##  [15] MASS_7.3-65               foreach_1.5.2            
##  [17] httpuv_1.6.16             qvalue_2.40.0            
##  [19] withr_3.0.2               ggrastr_1.0.2            
##  [21] psych_2.5.6               xfun_0.56                
##  [23] ggfun_0.2.0               survival_3.8-6           
##  [25] memoise_2.0.1             ggbeeswarm_0.7.3         
##  [27] gson_0.1.0                clusterProfiler_4.16.0   
##  [29] MatrixModels_0.5-4        tidytree_0.4.7           
##  [31] GlobalOptions_0.1.3       gtools_3.9.5             
##  [33] pbapply_1.7-4             R.oo_1.27.1              
##  [35] Formula_1.2-5             KEGGREST_1.48.1          
##  [37] promises_1.5.0            otel_0.2.0               
##  [39] httr_1.4.7                hash_2.2.6.3             
##  [41] stringfish_0.18.0         rstudioapi_0.18.0        
##  [43] UCSC.utils_1.4.0          DOSE_4.2.0               
##  [45] polyclip_1.10-7           ca_0.71.1                
##  [47] TSCAN_1.46.0              GenomeInfoDbData_1.2.14  
##  [49] quadprog_1.5-8            SparseArray_1.8.1        
##  [51] xtable_1.8-4              doParallel_1.0.17        
##  [53] evaluate_1.0.5            S4Arrays_1.8.1           
##  [55] irlba_2.3.7               colorspace_2.1-2         
##  [57] polynom_1.4-1             later_1.4.5              
##  [59] viridis_0.6.5             ggtree_3.16.3            
##  [61] lattice_0.22-7            SparseM_1.84-2           
##  [63] triebeard_0.4.1           pillar_1.11.1            
##  [65] iterators_1.0.14          caTools_1.18.3           
##  [67] compiler_4.5.2            RSpectra_0.16-2          
##  [69] stringi_1.8.7             TSP_1.2.6                
##  [71] minqa_1.2.8               plyr_1.8.9               
##  [73] drat_0.2.5                crayon_1.5.3             
##  [75] abind_1.4-8               gridGraphics_0.5-1       
##  [77] chron_2.3-62              locfit_1.5-9.12          
##  [79] org.Hs.eg.db_3.21.0       graphlayouts_1.2.2       
##  [81] bit_4.6.0                 fastmatch_1.1-8          
##  [83] tradeSeq_1.22.0           codetools_0.2-20         
##  [85] bslib_0.9.0               GetoptLong_1.1.0         
##  [87] mime_0.13                 splines_4.5.2            
##  [89] Rcpp_1.1.1                quantreg_6.1             
##  [91] sparseMatrixStats_1.20.0  pagoda2_1.0.13           
##  [93] brew_1.0-10               N2R_1.0.3                
##  [95] knitr_1.51                blob_1.2.4               
##  [97] utf8_1.2.6                clue_0.3-66              
##  [99] fs_1.6.6                  confintr_1.0.2           
## [101] DelayedMatrixStats_1.30.0 Rdpack_2.6.4             
## [103] ggsignif_0.6.4            ggplotify_0.1.3          
## [105] tibble_3.3.1              sqldf_0.4-11             
## [107] statmod_1.5.1             tweenr_2.0.3             
## [109] pkgconfig_2.0.3           tools_4.5.2              
## [111] Rook_1.2                  cachem_1.1.0             
## [113] rbibutils_2.3             RSQLite_2.4.5            
## [115] viridisLite_0.4.2         DBI_1.2.3                
## [117] fastmap_1.2.0             rmarkdown_2.30           
## [119] scales_1.4.0              pbmcapply_1.5.1          
## [121] ica_1.0-3                 broom_1.0.11             
## [123] sass_0.4.10               patchwork_1.3.2          
## [125] carData_3.0-5             farver_2.1.2             
## [127] reformulas_0.4.3.1        tidygraph_1.3.1          
## [129] gsubfn_0.7                yaml_2.3.12              
## [131] cli_3.6.5                 purrr_1.2.1              
## [133] lifecycle_1.0.5           uwot_0.2.4               
## [135] backports_1.5.0           BiocParallel_1.42.2      
## [137] gtable_0.3.6              rjson_0.2.23             
## [139] parallel_4.5.2            ape_5.8-1                
## [141] SnowballC_0.7.1           limma_3.64.3             
## [143] jsonlite_2.0.0            edgeR_4.6.3              
## [145] seriation_1.5.8           bitops_1.0-9             
## [147] bit64_4.6.0-1             Rtsne_0.17               
## [149] yulab.utils_0.2.3         coda.base_1.0.3          
## [151] proto_1.0.0               urltools_1.7.3.1         
## [153] RcppParallel_5.1.11-1     jquerylib_0.1.4          
## [155] GOSemSim_2.34.0           R.utils_2.13.0           
## [157] lazyeval_0.2.2            shiny_1.12.1             
## [159] htmltools_0.5.9           enrichplot_1.28.4        
## [161] GO.db_3.21.0              rappdirs_0.3.4           
## [163] glue_1.8.0                XVector_0.48.0           
## [165] treeio_1.32.0             mclust_6.1.2             
## [167] dunn.test_1.3.6           mnormt_2.1.1             
## [169] RMTstat_0.3.1             gridExtra_2.3            
## [171] boot_1.3-32               R6_2.6.1                 
## [173] tidyr_1.3.2               DESeq2_1.48.2            
## [175] gplots_3.3.0              labeling_0.4.3           
## [177] cluster_2.1.8.1           FSA_0.10.1               
## [179] aplot_0.2.9               nloptr_2.2.1             
## [181] DelayedArray_0.34.1       tidyselect_1.2.1         
## [183] vipor_0.4.7               plotrix_3.8-13           
## [185] dendsort_0.3.4            car_3.1-3                
## [187] AnnotationDbi_1.70.0      leidenAlg_1.1.5          
## [189] fastICA_1.2-7             KernSmooth_2.23-26       
## [191] S7_0.2.1                  data.table_1.18.0        
## [193] fgsea_1.34.2              rlang_1.1.7              
## [195] lsa_0.73.4                ggnewscale_0.5.2         
## [197] Cairo_1.7-0               beeswarm_0.4.0